diff options
author | dim <dim@FreeBSD.org> | 2014-03-21 17:53:59 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2014-03-21 17:53:59 +0000 |
commit | 9cedb8bb69b89b0f0c529937247a6a80cabdbaec (patch) | |
tree | c978f0e9ec1ab92dc8123783f30b08a7fd1e2a39 /contrib/llvm/lib/CodeGen | |
parent | 03fdc2934eb61c44c049a02b02aa974cfdd8a0eb (diff) | |
download | FreeBSD-src-9cedb8bb69b89b0f0c529937247a6a80cabdbaec.zip FreeBSD-src-9cedb8bb69b89b0f0c529937247a6a80cabdbaec.tar.gz |
MFC 261991:
Upgrade our copy of llvm/clang to 3.4 release. This version supports
all of the features in the current working draft of the upcoming C++
standard, provisionally named C++1y.
The code generator's performance is greatly increased, and the loop
auto-vectorizer is now enabled at -Os and -O2 in addition to -O3. The
PowerPC backend has made several major improvements to code generation
quality and compile time, and the X86, SPARC, ARM32, Aarch64 and SystemZ
backends have all seen major feature work.
Release notes for llvm and clang can be found here:
<http://llvm.org/releases/3.4/docs/ReleaseNotes.html>
<http://llvm.org/releases/3.4/tools/clang/docs/ReleaseNotes.html>
MFC 262121 (by emaste):
Update lldb for clang/llvm 3.4 import
This commit largely restores the lldb source to the upstream r196259
snapshot with the addition of threaded inferior support and a few bug
fixes.
Specific upstream lldb revisions restored include:
SVN git
181387 779e6ac
181703 7bef4e2
182099 b31044e
182650 f2dcf35
182683 0d91b80
183862 15c1774
183929 99447a6
184177 0b2934b
184948 4dc3761
184954 007e7bc
186990 eebd175
Sponsored by: DARPA, AFRL
MFC 262186 (by emaste):
Fix mismerge in r262121
A break statement was lost in the merge. The error had no functional
impact, but restore it to reduce the diff against upstream.
MFC 262303:
Pull in r197521 from upstream clang trunk (by rdivacky):
Use the integrated assembler by default on FreeBSD/ppc and ppc64.
Requested by: jhibbits
MFC 262611:
Pull in r196874 from upstream llvm trunk:
Fix a crash that occurs when PWD is invalid.
MCJIT needs to be able to run in hostile environments, even when PWD
is invalid. There's no need to crash MCJIT in this case.
The obvious fix is to simply leave MCContext's CompilationDir empty
when PWD can't be determined. This way, MCJIT clients,
and other clients that link with LLVM don't need a valid working directory.
If we do want to guarantee valid CompilationDir, that should be done
only for clients of getCompilationDir(). This is as simple as checking
for an empty string.
The only current use of getCompilationDir is EmitGenDwarfInfo, which
won't conceivably run with an invalid working dir. However, in the
purely hypothetically and untestable case that this happens, the
AT_comp_dir will be omitted from the compilation_unit DIE.
This should help fix assertions occurring with ports-mgmt/tinderbox,
when it is using jails, and sometimes invalidates clang's current
working directory.
Reported by: decke
MFC 262809:
Pull in r203007 from upstream clang trunk:
Don't produce an alias between destructors with different calling conventions.
Fixes pr19007.
(Please note that is an LLVM PR identifier, not a FreeBSD one.)
This should fix Firefox and/or libxul crashes (due to problems with
regparm/stdcall calling conventions) on i386.
Reported by: multiple users on freebsd-current
PR: bin/187103
MFC 263048:
Repair recognition of "CC" as an alias for the C++ compiler, since it
was silently broken by upstream for a Windows-specific use-case.
Apparently some versions of CMake still rely on this archaic feature...
Reported by: rakuco
MFC 263049:
Garbage collect the old way of adding the libstdc++ include directories
in clang's InitHeaderSearch.cpp. This has been superseded by David
Chisnall's commit in r255321.
Moreover, if libc++ is used, the libstdc++ include directories should
not be in the search path at all. These directories are now only used
if you pass -stdlib=libstdc++.
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
129 files changed, 14314 insertions, 10275 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index c50f8b5..2ee7767 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -247,8 +247,8 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) || IsImplicitDefUse(MI, MO)) { const unsigned Reg = MO.getReg(); - PassthruRegs.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) PassthruRegs.insert(*SubRegs); } } @@ -782,7 +782,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( if (MI == CriticalPathMI) { CriticalPathSU = CriticalPathStep(CriticalPathSU); CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0; - } else { + } else if (CriticalPathSet.any()) { ExcludeRegs = &CriticalPathSet; } diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp index 4731af5..1600c67 100644 --- a/contrib/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -202,161 +202,272 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { } static bool isNoopBitcast(Type *T1, Type *T2, - const TargetLowering& TLI) { + const TargetLoweringBase& TLI) { return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) || (isa<VectorType>(T1) && isa<VectorType>(T2) && TLI.isTypeLegal(EVT::getEVT(T1)) && TLI.isTypeLegal(EVT::getEVT(T2))); } -/// sameNoopInput - Return true if V1 == V2, else if either V1 or V2 is a noop -/// (i.e., lowers to no machine code), look through it (and any transitive noop -/// operands to it) and check if it has the same noop input value. This is -/// used to determine if a tail call can be formed. -static bool sameNoopInput(const Value *V1, const Value *V2, - SmallVectorImpl<unsigned> &Els1, - SmallVectorImpl<unsigned> &Els2, - const TargetLowering &TLI) { - using std::swap; - bool swapParity = false; - bool equalEls = Els1 == Els2; +/// Look through operations that will be free to find the earliest source of +/// this value. +/// +/// @param ValLoc If V has aggegate type, we will be interested in a particular +/// scalar component. This records its address; the reverse of this list gives a +/// sequence of indices appropriate for an extractvalue to locate the important +/// value. This value is updated during the function and on exit will indicate +/// similar information for the Value returned. +/// +/// @param DataBits If this function looks through truncate instructions, this +/// will record the smallest size attained. +static const Value *getNoopInput(const Value *V, + SmallVectorImpl<unsigned> &ValLoc, + unsigned &DataBits, + const TargetLoweringBase &TLI) { while (true) { - if ((equalEls && V1 == V2) || isa<UndefValue>(V1) || isa<UndefValue>(V2)) { - if (swapParity) - // Revert to original Els1 and Els2 to avoid confusing recursive calls - swap(Els1, Els2); - return true; - } - // Try to look through V1; if V1 is not an instruction, it can't be looked // through. - const Instruction *I = dyn_cast<Instruction>(V1); + const Instruction *I = dyn_cast<Instruction>(V); + if (!I || I->getNumOperands() == 0) return V; const Value *NoopInput = 0; - if (I != 0 && I->getNumOperands() > 0) { - Value *Op = I->getOperand(0); - if (isa<TruncInst>(I)) { - // Look through truly no-op truncates. - if (TLI.isTruncateFree(Op->getType(), I->getType())) - NoopInput = Op; - } else if (isa<BitCastInst>(I)) { - // Look through truly no-op bitcasts. - if (isNoopBitcast(Op->getType(), I->getType(), TLI)) - NoopInput = Op; - } else if (isa<GetElementPtrInst>(I)) { - // Look through getelementptr - if (cast<GetElementPtrInst>(I)->hasAllZeroIndices()) - NoopInput = Op; - } else if (isa<IntToPtrInst>(I)) { - // Look through inttoptr. - // Make sure this isn't a truncating or extending cast. We could - // support this eventually, but don't bother for now. - if (!isa<VectorType>(I->getType()) && - TLI.getPointerTy().getSizeInBits() == - cast<IntegerType>(Op->getType())->getBitWidth()) - NoopInput = Op; - } else if (isa<PtrToIntInst>(I)) { - // Look through ptrtoint. - // Make sure this isn't a truncating or extending cast. We could - // support this eventually, but don't bother for now. - if (!isa<VectorType>(I->getType()) && - TLI.getPointerTy().getSizeInBits() == - cast<IntegerType>(I->getType())->getBitWidth()) - NoopInput = Op; - } else if (isa<CallInst>(I)) { - // Look through call - for (User::const_op_iterator i = I->op_begin(), - // Skip Callee - e = I->op_end() - 1; - i != e; ++i) { - unsigned attrInd = i - I->op_begin() + 1; - if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && - isNoopBitcast((*i)->getType(), I->getType(), TLI)) { - NoopInput = *i; - break; - } + + Value *Op = I->getOperand(0); + if (isa<BitCastInst>(I)) { + // Look through truly no-op bitcasts. + if (isNoopBitcast(Op->getType(), I->getType(), TLI)) + NoopInput = Op; + } else if (isa<GetElementPtrInst>(I)) { + // Look through getelementptr + if (cast<GetElementPtrInst>(I)->hasAllZeroIndices()) + NoopInput = Op; + } else if (isa<IntToPtrInst>(I)) { + // Look through inttoptr. + // Make sure this isn't a truncating or extending cast. We could + // support this eventually, but don't bother for now. + if (!isa<VectorType>(I->getType()) && + TLI.getPointerTy().getSizeInBits() == + cast<IntegerType>(Op->getType())->getBitWidth()) + NoopInput = Op; + } else if (isa<PtrToIntInst>(I)) { + // Look through ptrtoint. + // Make sure this isn't a truncating or extending cast. We could + // support this eventually, but don't bother for now. + if (!isa<VectorType>(I->getType()) && + TLI.getPointerTy().getSizeInBits() == + cast<IntegerType>(I->getType())->getBitWidth()) + NoopInput = Op; + } else if (isa<TruncInst>(I) && + TLI.allowTruncateForTailCall(Op->getType(), I->getType())) { + DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits()); + NoopInput = Op; + } else if (isa<CallInst>(I)) { + // Look through call (skipping callee) + for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 1; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; } - } else if (isa<InvokeInst>(I)) { - // Look through invoke - for (User::const_op_iterator i = I->op_begin(), - // Skip BB, BB, Callee - e = I->op_end() - 3; - i != e; ++i) { - unsigned attrInd = i - I->op_begin() + 1; - if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && - isNoopBitcast((*i)->getType(), I->getType(), TLI)) { - NoopInput = *i; - break; - } + } + } else if (isa<InvokeInst>(I)) { + // Look through invoke (skipping BB, BB, Callee) + for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 3; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; } } + } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) { + // Value may come from either the aggregate or the scalar + ArrayRef<unsigned> InsertLoc = IVI->getIndices(); + if (std::equal(InsertLoc.rbegin(), InsertLoc.rend(), + ValLoc.rbegin())) { + // The type being inserted is a nested sub-type of the aggregate; we + // have to remove those initial indices to get the location we're + // interested in for the operand. + ValLoc.resize(ValLoc.size() - InsertLoc.size()); + NoopInput = IVI->getInsertedValueOperand(); + } else { + // The struct we're inserting into has the value we're interested in, no + // change of address. + NoopInput = Op; + } + } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) { + // The part we're interested in will inevitably be some sub-section of the + // previous aggregate. Combine the two paths to obtain the true address of + // our element. + ArrayRef<unsigned> ExtractLoc = EVI->getIndices(); + std::copy(ExtractLoc.rbegin(), ExtractLoc.rend(), + std::back_inserter(ValLoc)); + NoopInput = Op; } + // Terminate if we couldn't find anything to look through. + if (!NoopInput) + return V; - if (NoopInput) { - V1 = NoopInput; - continue; - } + V = NoopInput; + } +} + +/// Return true if this scalar return value only has bits discarded on its path +/// from the "tail call" to the "ret". This includes the obvious noop +/// instructions handled by getNoopInput above as well as free truncations (or +/// extensions prior to the call). +static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal, + SmallVectorImpl<unsigned> &RetIndices, + SmallVectorImpl<unsigned> &CallIndices, + bool AllowDifferingSizes, + const TargetLoweringBase &TLI) { + + // Trace the sub-value needed by the return value as far back up the graph as + // possible, in the hope that it will intersect with the value produced by the + // call. In the simple case with no "returned" attribute, the hope is actually + // that we end up back at the tail call instruction itself. + unsigned BitsRequired = UINT_MAX; + RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI); + + // If this slot in the value returned is undef, it doesn't matter what the + // call puts there, it'll be fine. + if (isa<UndefValue>(RetVal)) + return true; - // If we already swapped, avoid infinite loop - if (swapParity) - break; + // Now do a similar search up through the graph to find where the value + // actually returned by the "tail call" comes from. In the simple case without + // a "returned" attribute, the search will be blocked immediately and the loop + // a Noop. + unsigned BitsProvided = UINT_MAX; + CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI); + + // There's no hope if we can't actually trace them to (the same part of!) the + // same value. + if (CallVal != RetVal || CallIndices != RetIndices) + return false; + + // However, intervening truncates may have made the call non-tail. Make sure + // all the bits that are needed by the "ret" have been provided by the "tail + // call". FIXME: with sufficiently cunning bit-tracking, we could look through + // extensions too. + if (BitsProvided < BitsRequired || + (!AllowDifferingSizes && BitsProvided != BitsRequired)) + return false; - // Otherwise, swap V1<->V2, Els1<->Els2 - swap(V1, V2); - swap(Els1, Els2); - swapParity = !swapParity; + return true; +} + +/// For an aggregate type, determine whether a given index is within bounds or +/// not. +static bool indexReallyValid(CompositeType *T, unsigned Idx) { + if (ArrayType *AT = dyn_cast<ArrayType>(T)) + return Idx < AT->getNumElements(); + + return Idx < cast<StructType>(T)->getNumElements(); +} + +/// Move the given iterators to the next leaf type in depth first traversal. +/// +/// Performs a depth-first traversal of the type as specified by its arguments, +/// stopping at the next leaf node (which may be a legitimate scalar type or an +/// empty struct or array). +/// +/// @param SubTypes List of the partial components making up the type from +/// outermost to innermost non-empty aggregate. The element currently +/// represented is SubTypes.back()->getTypeAtIndex(Path.back() - 1). +/// +/// @param Path Set of extractvalue indices leading from the outermost type +/// (SubTypes[0]) to the leaf node currently represented. +/// +/// @returns true if a new type was found, false otherwise. Calling this +/// function again on a finished iterator will repeatedly return +/// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty +/// aggregate or a non-aggregate +static bool advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes, + SmallVectorImpl<unsigned> &Path) { + // First march back up the tree until we can successfully increment one of the + // coordinates in Path. + while (!Path.empty() && !indexReallyValid(SubTypes.back(), Path.back() + 1)) { + Path.pop_back(); + SubTypes.pop_back(); } - for (unsigned n = 0; n < 2; ++n) { - if (isa<InsertValueInst>(V1)) { - if (isa<StructType>(V1->getType())) { - // Look through insertvalue - unsigned i, e; - for (i = 0, e = cast<StructType>(V1->getType())->getNumElements(); - i != e; ++i) { - const Value *InScalar = FindInsertedValue(const_cast<Value*>(V1), i); - if (InScalar == 0) - break; - Els1.push_back(i); - if (!sameNoopInput(InScalar, V2, Els1, Els2, TLI)) { - Els1.pop_back(); - break; - } - Els1.pop_back(); - } - if (i == e) { - if (swapParity) - swap(Els1, Els2); - return true; - } - } - } else if (!Els1.empty() && isa<ExtractValueInst>(V1)) { - const ExtractValueInst *EVI = cast<ExtractValueInst>(V1); - unsigned i = Els1.back(); - // If the scalar value being inserted is an extractvalue of the right - // index from the call, then everything is good. - if (isa<StructType>(EVI->getOperand(0)->getType()) && - EVI->getNumIndices() == 1 && EVI->getIndices()[0] == i) { - // Look through extractvalue - Els1.pop_back(); - if (sameNoopInput(EVI->getOperand(0), V2, Els1, Els2, TLI)) { - Els1.push_back(i); - if (swapParity) - swap(Els1, Els2); - return true; - } - Els1.push_back(i); - } - } + // If we reached the top, then the iterator is done. + if (Path.empty()) + return false; - swap(V1, V2); - swap(Els1, Els2); - swapParity = !swapParity; + // We know there's *some* valid leaf now, so march back down the tree picking + // out the left-most element at each node. + ++Path.back(); + Type *DeeperType = SubTypes.back()->getTypeAtIndex(Path.back()); + while (DeeperType->isAggregateType()) { + CompositeType *CT = cast<CompositeType>(DeeperType); + if (!indexReallyValid(CT, 0)) + return true; + + SubTypes.push_back(CT); + Path.push_back(0); + + DeeperType = CT->getTypeAtIndex(0U); } - if (swapParity) - swap(Els1, Els2); - return false; + return true; } +/// Find the first non-empty, scalar-like type in Next and setup the iterator +/// components. +/// +/// Assuming Next is an aggregate of some kind, this function will traverse the +/// tree from left to right (i.e. depth-first) looking for the first +/// non-aggregate type which will play a role in function return. +/// +/// For example, if Next was {[0 x i64], {{}, i32, {}}, i32} then we would setup +/// Path as [1, 1] and SubTypes as [Next, {{}, i32, {}}] to represent the first +/// i32 in that type. +static bool firstRealType(Type *Next, + SmallVectorImpl<CompositeType *> &SubTypes, + SmallVectorImpl<unsigned> &Path) { + // First initialise the iterator components to the first "leaf" node + // (i.e. node with no valid sub-type at any index, so {} does count as a leaf + // despite nominally being an aggregate). + while (Next->isAggregateType() && + indexReallyValid(cast<CompositeType>(Next), 0)) { + SubTypes.push_back(cast<CompositeType>(Next)); + Path.push_back(0); + Next = cast<CompositeType>(Next)->getTypeAtIndex(0U); + } + + // If there's no Path now, Next was originally scalar already (or empty + // leaf). We're done. + if (Path.empty()) + return true; + + // Otherwise, use normal iteration to keep looking through the tree until we + // find a non-aggregate type. + while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()) { + if (!advanceToNextLeafType(SubTypes, Path)) + return false; + } + + return true; +} + +/// Set the iterator data-structures to the next non-empty, non-aggregate +/// subtype. +static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes, + SmallVectorImpl<unsigned> &Path) { + do { + if (!advanceToNextLeafType(SubTypes, Path)) + return false; + + assert(!Path.empty() && "found a leaf but didn't set the path?"); + } while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()); + + return true; +} + + /// Test if the given instruction is in a position to be optimized /// with a tail-call. This roughly means that it's in a block with /// a return and there's nothing that needs to be scheduled @@ -399,6 +510,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, return false; } + return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret, TLI); +} + +bool llvm::returnTypeIsEligibleForTailCall(const Function *F, + const Instruction *I, + const ReturnInst *Ret, + const TargetLoweringBase &TLI) { // If the block ends with a void return or unreachable, it doesn't matter // what the call's return type is. if (!Ret || Ret->getNumOperands() == 0) return true; @@ -407,22 +525,85 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, // return type is. if (isa<UndefValue>(Ret->getOperand(0))) return true; - // Conservatively require the attributes of the call to match those of - // the return. Ignore noalias because it doesn't affect the call sequence. - const Function *F = ExitBB->getParent(); - AttributeSet CallerAttrs = F->getAttributes(); - if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). - removeAttribute(Attribute::NoAlias) != - AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). - removeAttribute(Attribute::NoAlias)) - return false; + // Make sure the attributes attached to each return are compatible. + AttrBuilder CallerAttrs(F->getAttributes(), + AttributeSet::ReturnIndex); + AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(), + AttributeSet::ReturnIndex); + + // Noalias is completely benign as far as calling convention goes, it + // shouldn't affect whether the call is a tail call. + CallerAttrs = CallerAttrs.removeAttribute(Attribute::NoAlias); + CalleeAttrs = CalleeAttrs.removeAttribute(Attribute::NoAlias); + + bool AllowDifferingSizes = true; + if (CallerAttrs.contains(Attribute::ZExt)) { + if (!CalleeAttrs.contains(Attribute::ZExt)) + return false; - // It's not safe to eliminate the sign / zero extension of the return value. - if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || - CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + AllowDifferingSizes = false; + CallerAttrs.removeAttribute(Attribute::ZExt); + CalleeAttrs.removeAttribute(Attribute::ZExt); + } else if (CallerAttrs.contains(Attribute::SExt)) { + if (!CalleeAttrs.contains(Attribute::SExt)) + return false; + + AllowDifferingSizes = false; + CallerAttrs.removeAttribute(Attribute::SExt); + CalleeAttrs.removeAttribute(Attribute::SExt); + } + + // If they're still different, there's some facet we don't understand + // (currently only "inreg", but in future who knows). It may be OK but the + // only safe option is to reject the tail call. + if (CallerAttrs != CalleeAttrs) return false; - // Otherwise, make sure the return value and I have the same value - SmallVector<unsigned, 4> Els1, Els2; - return sameNoopInput(Ret->getOperand(0), I, Els1, Els2, TLI); + const Value *RetVal = Ret->getOperand(0), *CallVal = I; + SmallVector<unsigned, 4> RetPath, CallPath; + SmallVector<CompositeType *, 4> RetSubTypes, CallSubTypes; + + bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath); + bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath); + + // Nothing's actually returned, it doesn't matter what the callee put there + // it's a valid tail call. + if (RetEmpty) + return true; + + // Iterate pairwise through each of the value types making up the tail call + // and the corresponding return. For each one we want to know whether it's + // essentially going directly from the tail call to the ret, via operations + // that end up not generating any code. + // + // We allow a certain amount of covariance here. For example it's permitted + // for the tail call to define more bits than the ret actually cares about + // (e.g. via a truncate). + do { + if (CallEmpty) { + // We've exhausted the values produced by the tail call instruction, the + // rest are essentially undef. The type doesn't really matter, but we need + // *something*. + Type *SlotType = RetSubTypes.back()->getTypeAtIndex(RetPath.back()); + CallVal = UndefValue::get(SlotType); + } + + // The manipulations performed when we're looking through an insertvalue or + // an extractvalue would happen at the front of the RetPath list, so since + // we have to copy it anyway it's more efficient to create a reversed copy. + using std::copy; + SmallVector<unsigned, 4> TmpRetPath, TmpCallPath; + copy(RetPath.rbegin(), RetPath.rend(), std::back_inserter(TmpRetPath)); + copy(CallPath.rbegin(), CallPath.rend(), std::back_inserter(TmpCallPath)); + + // Finally, we can check whether the value produced by the tail call at this + // index is compatible with the value we return. + if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath, + AllowDifferingSizes, TLI)) + return false; + + CallEmpty = !nextRealType(CallSubTypes, CallPath); + } while(nextRealType(RetSubTypes, RetPath)); + + return true; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index 188047d..5d82dd9 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -47,13 +47,18 @@ ARMException::ARMException(AsmPrinter *A) ARMException::~ARMException() {} +ARMTargetStreamer &ARMException::getTargetStreamer() { + MCTargetStreamer &TS = Asm->OutStreamer.getTargetStreamer(); + return static_cast<ARMTargetStreamer &>(TS); +} + void ARMException::EndModule() { } /// BeginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. void ARMException::BeginFunction(const MachineFunction *MF) { - Asm->OutStreamer.EmitFnStart(); + getTargetStreamer().emitFnStart(); if (Asm->MF->getFunction()->needsUnwindTableEntry()) Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber())); @@ -62,8 +67,9 @@ void ARMException::BeginFunction(const MachineFunction *MF) { /// EndFunction - Gather and emit post-function exception information. /// void ARMException::EndFunction() { + ARMTargetStreamer &ATS = getTargetStreamer(); if (!Asm->MF->getFunction()->needsUnwindTableEntry()) - Asm->OutStreamer.EmitCantUnwind(); + ATS.emitCantUnwind(); else { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber())); @@ -76,13 +82,13 @@ void ARMException::EndFunction() { // Emit references to personality. if (const Function * Personality = MMI->getPersonalities()[MMI->getPersonalityIndex()]) { - MCSymbol *PerSym = Asm->Mang->getSymbol(Personality); + MCSymbol *PerSym = Asm->getSymbol(Personality); Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global); - Asm->OutStreamer.EmitPersonality(PerSym); + ATS.emitPersonality(PerSym); } // Emit .handlerdata directive. - Asm->OutStreamer.EmitHandlerData(); + ATS.emitHandlerData(); // Emit actual exception table EmitExceptionTable(); @@ -90,7 +96,7 @@ void ARMException::EndFunction() { } } - Asm->OutStreamer.EmitFnEnd(); + ATS.emitFnEnd(); } void ARMException::EmitTypeInfos(unsigned TTypeEncoding) { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 84162ac..308b0e0 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -42,16 +42,18 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" using namespace llvm; -static const char *DWARFGroupName = "DWARF Emission"; -static const char *DbgTimerName = "DWARF Debug Writer"; -static const char *EHTimerName = "DWARF Exception Writer"; +static const char *const DWARFGroupName = "DWARF Emission"; +static const char *const DbgTimerName = "DWARF Debug Writer"; +static const char *const EHTimerName = "DWARF Exception Writer"; STATISTIC(EmittedInsts, "Number of machine instrs printed"); @@ -93,11 +95,11 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD, AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) : MachineFunctionPass(ID), - TM(tm), MAI(tm.getMCAsmInfo()), + TM(tm), MAI(tm.getMCAsmInfo()), MII(tm.getInstrInfo()), OutContext(Streamer.getContext()), OutStreamer(Streamer), LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) { - DD = 0; DE = 0; MMI = 0; LI = 0; + DD = 0; DE = 0; MMI = 0; LI = 0; MF = 0; CurrentFnSym = CurrentFnSymForSize = 0; GCMetadataPrinters = 0; VerboseAsm = Streamer.isVerboseAsm(); @@ -154,8 +156,6 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { } bool AsmPrinter::doInitialization(Module &M) { - OutStreamer.InitStreamer(); - MMI = getAnalysisIfAvailable<MachineModuleInfo>(); MMI->AnalyzeModule(M); @@ -163,7 +163,9 @@ bool AsmPrinter::doInitialization(Module &M) { const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) .Initialize(OutContext, TM); - Mang = new Mangler(OutContext, *TM.getDataLayout()); + OutStreamer.InitStreamer(); + + Mang = new Mangler(&TM); // Allow the target to emit any magic that it wants at the start of the file. EmitStartOfAsmFile(M); @@ -211,12 +213,12 @@ bool AsmPrinter::doInitialization(Module &M) { llvm_unreachable("Unknown exception type."); } -void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { - switch ((GlobalValue::LinkageTypes)Linkage) { +void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { + GlobalValue::LinkageTypes Linkage = GV->getLinkage(); + switch (Linkage) { case GlobalValue::CommonLinkage: case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: - case GlobalValue::LinkOnceODRAutoHideLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::LinkerPrivateWeakLinkage: @@ -224,8 +226,19 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - if ((GlobalValue::LinkageTypes)Linkage != - GlobalValue::LinkOnceODRAutoHideLinkage) + bool CanBeHidden = false; + + if (Linkage == GlobalValue::LinkOnceODRLinkage) { + if (GV->hasUnnamedAddr()) { + CanBeHidden = true; + } else { + GlobalStatus GS; + if (!GlobalStatus::analyzeGlobal(GV, GS) && !GS.IsCompared) + CanBeHidden = true; + } + } + + if (!CanBeHidden) // .weak_definition _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); else @@ -238,7 +251,7 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { // .weak _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak); } - break; + return; case GlobalValue::DLLExportLinkage: case GlobalValue::AppendingLinkage: // FIXME: appending linkage variables should go into a section of @@ -247,16 +260,23 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { // If external or appending, declare as a global symbol. // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - break; + return; case GlobalValue::PrivateLinkage: case GlobalValue::InternalLinkage: case GlobalValue::LinkerPrivateLinkage: - break; - default: - llvm_unreachable("Unknown linkage type!"); + return; + case GlobalValue::AvailableExternallyLinkage: + llvm_unreachable("Should never emit this"); + case GlobalValue::DLLImportLinkage: + case GlobalValue::ExternalWeakLinkage: + llvm_unreachable("Don't know how to emit these"); } + llvm_unreachable("Unknown linkage type!"); } +MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const { + return getObjFileLowering().getSymbol(*Mang, GV); +} /// EmitGlobalVariable - Emit the specified global variable to the .s file. void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { @@ -272,7 +292,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { } } - MCSymbol *GVSym = Mang->getSymbol(GV); + MCSymbol *GVSym = getSymbol(GV); EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration()); if (!GV->hasInitializer()) // External globals require no extra code. @@ -283,13 +303,16 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); - const DataLayout *TD = TM.getDataLayout(); - uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); + const DataLayout *DL = TM.getDataLayout(); + uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType()); // If the alignment is specified, we *must* obey it. Overaligning a global // with a specified alignment is a prompt way to break globals emitted to // sections and expected to be contiguous (e.g. ObjC metadata). - unsigned AlignLog = getGVAlignmentLog2(GV, *TD); + unsigned AlignLog = getGVAlignmentLog2(GV, *DL); + + if (DD) + DD->setSymbolSize(GVSym, Size); // Handle common and BSS local symbols (.lcomm). if (GVKind.isCommon() || GVKind.isBSSLocal()) { @@ -367,9 +390,10 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { MCSymbol *MangSym = OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); - if (GVKind.isThreadBSS()) + if (GVKind.isThreadBSS()) { + TheSection = getObjFileLowering().getTLSBSSSection(); OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog); - else if (GVKind.isThreadData()) { + } else if (GVKind.isThreadData()) { OutStreamer.SwitchSection(TheSection); EmitAlignment(AlignLog, GV); @@ -386,16 +410,16 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer.SwitchSection(TLVSect); // Emit the linkage here. - EmitLinkage(GV->getLinkage(), GVSym); + EmitLinkage(GV, GVSym); OutStreamer.EmitLabel(GVSym); // Three pointers in size: // - __tlv_bootstrap - used to make sure support exists // - spare pointer, used when mapped by the runtime // - pointer to mangled symbol above with initializer - unsigned PtrSize = TD->getPointerSizeInBits()/8; + unsigned PtrSize = DL->getPointerTypeSize(GV->getType()); OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), - PtrSize); + PtrSize); OutStreamer.EmitIntValue(0, PtrSize); OutStreamer.EmitSymbolValue(MangSym, PtrSize); @@ -405,7 +429,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer.SwitchSection(TheSection); - EmitLinkage(GV->getLinkage(), GVSym); + EmitLinkage(GV, GVSym); EmitAlignment(AlignLog, GV); OutStreamer.EmitLabel(GVSym); @@ -431,7 +455,7 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); EmitVisibility(CurrentFnSym, F->getVisibility()); - EmitLinkage(F->getLinkage(), CurrentFnSym); + EmitLinkage(F, CurrentFnSym); EmitAlignment(MF->getAlignment(), F); if (MAI->hasDotTypeDotSizeDirective()) @@ -457,16 +481,6 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer.EmitLabel(DeadBlockSyms[i]); } - // Add some workaround for linkonce linkage on Cygwin\MinGW. - if (MAI->getLinkOnceDirective() != 0 && - (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) { - // FIXME: What is this? - MCSymbol *FakeStub = - OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+ - CurrentFnSym->getName()); - OutStreamer.EmitLabel(FakeStub); - } - // Emit pre-function debug and/or EH information. if (DE) { NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); @@ -476,6 +490,10 @@ void AsmPrinter::EmitFunctionHeader() { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); DD->beginFunction(MF); } + + // Emit the prefix data. + if (F->hasPrefixData()) + EmitGlobalConstant(F->getPrefixData()); } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the @@ -528,11 +546,11 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { /// emitImplicitDef - This method emits the specified machine instruction /// that is an implicit def. -static void emitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) { +void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { unsigned RegNo = MI->getOperand(0).getReg(); - AP.OutStreamer.AddComment(Twine("implicit-def: ") + - AP.TM.getRegisterInfo()->getName(RegNo)); - AP.OutStreamer.AddBlankLine(); + OutStreamer.AddComment(Twine("implicit-def: ") + + TM.getRegisterInfo()->getName(RegNo)); + OutStreamer.AddBlankLine(); } static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { @@ -562,10 +580,17 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { // cast away const; DIetc do not take const operands for some reason. DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata())); - if (V.getContext().isSubprogram()) - OS << DISubprogram(V.getContext()).getDisplayName() << ":"; + if (V.getContext().isSubprogram()) { + StringRef Name = DISubprogram(V.getContext()).getDisplayName(); + if (!Name.empty()) + OS << Name << ":"; + } OS << V.getName() << " <- "; + // The second operand is only an offset if it's an immediate. + bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm(); + int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0; + // Register or immediate value. Register 0 means undef. if (MI->getOperand(0).isFPImm()) { APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); @@ -586,18 +611,31 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } else if (MI->getOperand(0).isCImm()) { MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/); } else { - assert(MI->getOperand(0).isReg() && "Unknown operand type"); - if (MI->getOperand(0).getReg() == 0) { + unsigned Reg; + if (MI->getOperand(0).isReg()) { + Reg = MI->getOperand(0).getReg(); + } else { + assert(MI->getOperand(0).isFI() && "Unknown operand type"); + const TargetFrameLowering *TFI = AP.TM.getFrameLowering(); + Offset += TFI->getFrameIndexReference(*AP.MF, + MI->getOperand(0).getIndex(), Reg); + Deref = true; + } + if (Reg == 0) { // Suppress offset, it is not meaningful here. OS << "undef"; // NOTE: Want this comment at start of line, don't emit with AddComment. AP.OutStreamer.EmitRawText(OS.str()); return true; } - OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg()); + if (Deref) + OS << '['; + OS << AP.TM.getRegisterInfo()->getName(Reg); } - OS << '+' << MI->getOperand(1).getImm(); + if (Deref) + OS << '+' << Offset << ']'; + // NOTE: Want this comment at start of line, don't emit with AddComment. AP.OutStreamer.EmitRawText(OS.str()); return true; @@ -624,7 +662,7 @@ bool AsmPrinter::needsRelocationsForDwarfStringPool() const { } void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { - MCSymbol *Label = MI.getOperand(0).getMCSymbol(); + const MCSymbol *Label = MI.getOperand(0).getMCSymbol(); if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI) return; @@ -635,14 +673,14 @@ void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { if (MMI->getCompactUnwindEncoding() != 0) OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding()); - MachineModuleInfo &MMI = MF->getMMI(); - std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + const MachineModuleInfo &MMI = MF->getMMI(); + const std::vector<MCCFIInstruction> &Instrs = MMI.getFrameInstructions(); bool FoundOne = false; (void)FoundOne; - for (std::vector<MachineMove>::iterator I = Moves.begin(), - E = Moves.end(); I != E; ++I) { + for (std::vector<MCCFIInstruction>::const_iterator I = Instrs.begin(), + E = Instrs.end(); I != E; ++I) { if (I->getLabel() == Label) { - EmitCFIFrameMove(*I); + emitCFIInstruction(*I); FoundOne = true; } } @@ -702,7 +740,7 @@ void AsmPrinter::EmitFunctionBody() { } break; case TargetOpcode::IMPLICIT_DEF: - if (isVerbose()) emitImplicitDef(II, *this); + if (isVerbose()) emitImplicitDef(II); break; case TargetOpcode::KILL: if (isVerbose()) emitKill(II, *this); @@ -790,16 +828,9 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.AddBlankLine(); } -/// getDebugValueLocation - Get location information encoded by DBG_VALUE -/// operands. -MachineLocation AsmPrinter:: -getDebugValueLocation(const MachineInstr *MI) const { - // Target specific DBG_VALUE instructions are handled by each target. - return MachineLocation(); -} - /// EmitDwarfRegOp - Emit dwarf register operation. -void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { +void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc, + bool Indirect) const { const TargetRegisterInfo *TRI = TM.getRegisterInfo(); int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); @@ -817,7 +848,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { // caller might be in the middle of an dwarf expression. We should // probably assert that Reg >= 0 once debug info generation is more mature. - if (MLoc.isIndirect()) { + if (MLoc.isIndirect() || Indirect) { if (Reg < 32) { OutStreamer.AddComment( dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg)); @@ -828,7 +859,9 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { OutStreamer.AddComment(Twine(Reg)); EmitULEB128(Reg); } - EmitSLEB128(MLoc.getOffset()); + EmitSLEB128(!MLoc.isIndirect() ? 0 : MLoc.getOffset()); + if (MLoc.isIndirect() && Indirect) + EmitInt8(dwarf::DW_OP_deref); } else { if (Reg < 32) { OutStreamer.AddComment( @@ -860,7 +893,7 @@ bool AsmPrinter::doFinalization(Module &M) { if (V == GlobalValue::DefaultVisibility) continue; - MCSymbol *Name = Mang->getSymbol(&F); + MCSymbol *Name = getSymbol(&F); EmitVisibility(Name, V, false); } @@ -870,6 +903,9 @@ bool AsmPrinter::doFinalization(Module &M) { if (!ModuleFlags.empty()) getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM); + // Make sure we wrote out everything we need. + OutStreamer.Flush(); + // Finalize debug and EH information. if (DE) { { @@ -897,12 +933,12 @@ bool AsmPrinter::doFinalization(Module &M) { for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { if (!I->hasExternalWeakLinkage()) continue; - OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); + OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference); } for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { if (!I->hasExternalWeakLinkage()) continue; - OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); + OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference); } } @@ -910,14 +946,19 @@ bool AsmPrinter::doFinalization(Module &M) { OutStreamer.AddBlankLine(); for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) { - MCSymbol *Name = Mang->getSymbol(I); + MCSymbol *Name = getSymbol(I); const GlobalValue *GV = I->getAliasedGlobal(); - MCSymbol *Target = Mang->getSymbol(GV); + if (GV->isDeclaration()) { + report_fatal_error(Name->getName() + + ": Target doesn't support aliases to declarations"); + } + + MCSymbol *Target = getSymbol(GV); if (I->hasExternalLinkage() || !MAI->getWeakRefDirective()) OutStreamer.EmitSymbolAttribute(Name, MCSA_Global); - else if (I->hasWeakLinkage()) + else if (I->hasWeakLinkage() || I->hasLinkOnceLinkage()) OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference); else assert(I->hasLocalLinkage() && "Invalid alias linkage"); @@ -936,6 +977,9 @@ bool AsmPrinter::doFinalization(Module &M) { if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I)) MP->finishAssembly(*this); + // Emit llvm.ident metadata in an '.ident' directive. + EmitModuleIdents(M); + // If we don't have any trampolines, then we don't require stack memory // to be executable. Some targets have a directive to declare this. Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); @@ -959,7 +1003,7 @@ bool AsmPrinter::doFinalization(Module &M) { void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; // Get the function symbol. - CurrentFnSym = Mang->getSymbol(MF.getFunction()); + CurrentFnSym = getSymbol(MF.getFunction()); CurrentFnSymForSize = CurrentFnSym; if (isVerbose()) @@ -1266,16 +1310,10 @@ void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) { const GlobalValue *GV = dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts()); if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang)) - OutStreamer.EmitSymbolAttribute(Mang->getSymbol(GV), MCSA_NoDeadStrip); + OutStreamer.EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip); } } -typedef std::pair<unsigned, Constant*> Structor; - -static bool priority_order(const Structor& lhs, const Structor& rhs) { - return lhs.first < rhs.first; -} - /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init /// priority. void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { @@ -1292,6 +1330,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { !isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr). // Gather the structors in a form that's convenient for sorting by priority. + typedef std::pair<unsigned, Constant *> Structor; SmallVector<Structor, 8> Structors; for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i)); @@ -1305,9 +1344,9 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { } // Emit the function pointers in the target-specific order - const DataLayout *TD = TM.getDataLayout(); - unsigned Align = Log2_32(TD->getPointerPrefAlignment()); - std::stable_sort(Structors.begin(), Structors.end(), priority_order); + const DataLayout *DL = TM.getDataLayout(); + unsigned Align = Log2_32(DL->getPointerPrefAlignment()); + std::stable_sort(Structors.begin(), Structors.end(), less_first()); for (unsigned i = 0, e = Structors.size(); i != e; ++i) { const MCSection *OutputSection = (isCtor ? @@ -1320,6 +1359,21 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { } } +void AsmPrinter::EmitModuleIdents(Module &M) { + if (!MAI->hasIdentDirective()) + return; + + if (const NamedMDNode *NMD = M.getNamedMetadata("llvm.ident")) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + const MDNode *N = NMD->getOperand(i); + assert(N->getNumOperands() == 1 && + "llvm.ident metadata entry can have only one operand"); + const MDString *S = cast<MDString>(N->getOperand(0)); + OutStreamer.EmitIdent(S->getString()); + } + } +} + //===--------------------------------------------------------------------===// // Emission and print routines // @@ -1385,12 +1439,12 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, OutContext); if (!MAI->hasSetDirective()) - OutStreamer.EmitValue(Diff, 4); + OutStreamer.EmitValue(Diff, Size); else { // Otherwise, emit with .set (aka assignment). MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); OutStreamer.EmitAssignment(SetLabel, Diff); - OutStreamer.EmitSymbolValue(SetLabel, 4); + OutStreamer.EmitSymbolValue(SetLabel, Size); } } @@ -1398,8 +1452,12 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, /// where the size in bytes of the directive is specified by Size and Label /// specifies the label. This implicitly uses .set if it is available. void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, - unsigned Size) + unsigned Size, bool IsSectionRelative) const { + if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) { + OutStreamer.EmitCOFFSecRel32(Label); + return; + } // Emit Label+Offset (or just Label if Offset is zero) const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext); @@ -1447,7 +1505,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { return MCConstantExpr::Create(CI->getZExtValue(), Ctx); if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) - return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); + return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx); if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); @@ -1477,10 +1535,10 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { - const DataLayout &TD = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getDataLayout(); // Generate a symbolic expression for the byte address - APInt OffsetAI(TD.getPointerSizeInBits(), 0); - cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); + APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); + cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI); const MCExpr *Base = lowerConstant(CE->getOperand(0), AP); if (!OffsetAI) @@ -1501,17 +1559,17 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { return lowerConstant(CE->getOperand(0), AP); case Instruction::IntToPtr: { - const DataLayout &TD = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getDataLayout(); // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); - Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), + Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()), false/*ZExt*/); return lowerConstant(Op, AP); } case Instruction::PtrToInt: { - const DataLayout &TD = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getDataLayout(); // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. Constant *Op = CE->getOperand(0); @@ -1521,13 +1579,13 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { // We can emit the pointer value into this slot if the slot is an // integer slot equal to the size of the pointer. - if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) + if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType())) return OpExpr; // Otherwise the pointer is smaller than the resultant integer, mask off // the high bits so we are sure to get a proper truncation if the input is // a constant expr. - unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); + unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType()); const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); } @@ -1561,8 +1619,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { } } -static void emitGlobalConstantImpl(const Constant *C, unsigned AddrSpace, - AsmPrinter &AP); +static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP); /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the @@ -1624,7 +1681,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { } static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, - unsigned AddrSpace,AsmPrinter &AP){ + AsmPrinter &AP){ // See if we can aggregate this into a .fill, if so, emit it as such. int Value = isRepeatedByteSequence(CDS, AP.TM); @@ -1632,12 +1689,12 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType()); // Don't emit a 1-byte object as a .fill. if (Bytes > 1) - return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); + return AP.OutStreamer.EmitFill(Bytes, Value); } // If this can be emitted with .ascii/.asciz, emit it as such. if (CDS->isString()) - return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace); + return AP.OutStreamer.EmitBytes(CDS->getAsString()); // Otherwise, emit the values in successive locations. unsigned ElementByteSize = CDS->getElementByteSize(); @@ -1647,7 +1704,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(i)); AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i), - ElementByteSize, AddrSpace); + ElementByteSize); } } else if (ElementByteSize == 4) { // FP Constants are printed as integer constants to avoid losing @@ -1662,7 +1719,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, F = CDS->getElementAsFloat(i); if (AP.isVerbose()) AP.OutStreamer.GetCommentOS() << "float " << F << '\n'; - AP.OutStreamer.EmitIntValue(I, 4, AddrSpace); + AP.OutStreamer.EmitIntValue(I, 4); } } else { assert(CDS->getElementType()->isDoubleTy()); @@ -1675,78 +1732,74 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, F = CDS->getElementAsDouble(i); if (AP.isVerbose()) AP.OutStreamer.GetCommentOS() << "double " << F << '\n'; - AP.OutStreamer.EmitIntValue(I, 8, AddrSpace); + AP.OutStreamer.EmitIntValue(I, 8); } } - const DataLayout &TD = *AP.TM.getDataLayout(); - unsigned Size = TD.getTypeAllocSize(CDS->getType()); - unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) * + const DataLayout &DL = *AP.TM.getDataLayout(); + unsigned Size = DL.getTypeAllocSize(CDS->getType()); + unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) * CDS->getNumElements(); if (unsigned Padding = Size - EmittedSize) - AP.OutStreamer.EmitZeros(Padding, AddrSpace); + AP.OutStreamer.EmitZeros(Padding); } -static void emitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, - AsmPrinter &AP) { +static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) { // See if we can aggregate some values. Make sure it can be // represented as a series of bytes of the constant value. int Value = isRepeatedByteSequence(CA, AP.TM); if (Value != -1) { uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType()); - AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); + AP.OutStreamer.EmitFill(Bytes, Value); } else { for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - emitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); + emitGlobalConstantImpl(CA->getOperand(i), AP); } } -static void emitGlobalConstantVector(const ConstantVector *CV, - unsigned AddrSpace, AsmPrinter &AP) { +static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) - emitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP); + emitGlobalConstantImpl(CV->getOperand(i), AP); - const DataLayout &TD = *AP.TM.getDataLayout(); - unsigned Size = TD.getTypeAllocSize(CV->getType()); - unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) * + const DataLayout &DL = *AP.TM.getDataLayout(); + unsigned Size = DL.getTypeAllocSize(CV->getType()); + unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) * CV->getType()->getNumElements(); if (unsigned Padding = Size - EmittedSize) - AP.OutStreamer.EmitZeros(Padding, AddrSpace); + AP.OutStreamer.EmitZeros(Padding); } -static void emitGlobalConstantStruct(const ConstantStruct *CS, - unsigned AddrSpace, AsmPrinter &AP) { +static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) { // Print the fields in successive locations. Pad to align if needed! - const DataLayout *TD = AP.TM.getDataLayout(); - unsigned Size = TD->getTypeAllocSize(CS->getType()); - const StructLayout *Layout = TD->getStructLayout(CS->getType()); + const DataLayout *DL = AP.TM.getDataLayout(); + unsigned Size = DL->getTypeAllocSize(CS->getType()); + const StructLayout *Layout = DL->getStructLayout(CS->getType()); uint64_t SizeSoFar = 0; for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { const Constant *Field = CS->getOperand(i); // Check if padding is needed and insert one or more 0s. - uint64_t FieldSize = TD->getTypeAllocSize(Field->getType()); + uint64_t FieldSize = DL->getTypeAllocSize(Field->getType()); uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1)) - Layout->getElementOffset(i)) - FieldSize; SizeSoFar += FieldSize + PadSize; // Now print the actual field value. - emitGlobalConstantImpl(Field, AddrSpace, AP); + emitGlobalConstantImpl(Field, AP); // Insert padding - this may include padding to increase the size of the // current field up to the ABI size (if the struct is not packed) as well // as padding to ensure that the next field starts at the right offset. - AP.OutStreamer.EmitZeros(PadSize, AddrSpace); + AP.OutStreamer.EmitZeros(PadSize); } assert(SizeSoFar == Layout->getSizeInBytes() && "Layout of constant struct may be incorrect!"); } -static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, - AsmPrinter &AP) { +static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { APInt API = CFP->getValueAPF().bitcastToAPInt(); // First print a comment with what we think the original floating-point value @@ -1772,47 +1825,86 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, int Chunk = API.getNumWords() - 1; if (TrailingBytes) - AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes, AddrSpace); + AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes); for (; Chunk >= 0; --Chunk) - AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace); + AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t)); } else { unsigned Chunk; for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk) - AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace); + AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t)); if (TrailingBytes) - AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes, AddrSpace); + AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes); } // Emit the tail padding for the long double. - const DataLayout &TD = *AP.TM.getDataLayout(); - AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - - TD.getTypeStoreSize(CFP->getType()), AddrSpace); + const DataLayout &DL = *AP.TM.getDataLayout(); + AP.OutStreamer.EmitZeros(DL.getTypeAllocSize(CFP->getType()) - + DL.getTypeStoreSize(CFP->getType())); } -static void emitGlobalConstantLargeInt(const ConstantInt *CI, - unsigned AddrSpace, AsmPrinter &AP) { - const DataLayout *TD = AP.TM.getDataLayout(); +static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { + const DataLayout *DL = AP.TM.getDataLayout(); unsigned BitWidth = CI->getBitWidth(); - assert((BitWidth & 63) == 0 && "only support multiples of 64-bits"); + + // Copy the value as we may massage the layout for constants whose bit width + // is not a multiple of 64-bits. + APInt Realigned(CI->getValue()); + uint64_t ExtraBits = 0; + unsigned ExtraBitsSize = BitWidth & 63; + + if (ExtraBitsSize) { + // The bit width of the data is not a multiple of 64-bits. + // The extra bits are expected to be at the end of the chunk of the memory. + // Little endian: + // * Nothing to be done, just record the extra bits to emit. + // Big endian: + // * Record the extra bits to emit. + // * Realign the raw data to emit the chunks of 64-bits. + if (DL->isBigEndian()) { + // Basically the structure of the raw data is a chunk of 64-bits cells: + // 0 1 BitWidth / 64 + // [chunk1][chunk2] ... [chunkN]. + // The most significant chunk is chunkN and it should be emitted first. + // However, due to the alignment issue chunkN contains useless bits. + // Realign the chunks so that they contain only useless information: + // ExtraBits 0 1 (BitWidth / 64) - 1 + // chu[nk1 chu][nk2 chu] ... [nkN-1 chunkN] + ExtraBits = Realigned.getRawData()[0] & + (((uint64_t)-1) >> (64 - ExtraBitsSize)); + Realigned = Realigned.lshr(ExtraBitsSize); + } else + ExtraBits = Realigned.getRawData()[BitWidth / 64]; + } // We don't expect assemblers to support integer data directives // for more than 64 bits, so we emit the data in at most 64-bit // quantities at a time. - const uint64_t *RawData = CI->getValue().getRawData(); + const uint64_t *RawData = Realigned.getRawData(); for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { - uint64_t Val = TD->isBigEndian() ? RawData[e - i - 1] : RawData[i]; - AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace); + uint64_t Val = DL->isBigEndian() ? RawData[e - i - 1] : RawData[i]; + AP.OutStreamer.EmitIntValue(Val, 8); + } + + if (ExtraBitsSize) { + // Emit the extra bits after the 64-bits chunks. + + // Emit a directive that fills the expected size. + uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize(CI->getType()); + Size -= (BitWidth / 64) * 8; + assert(Size && Size * 8 >= ExtraBitsSize && + (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize))) + == ExtraBits && "Directive too small for extra bits."); + AP.OutStreamer.EmitIntValue(ExtraBits, Size); } } -static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, - AsmPrinter &AP) { - const DataLayout *TD = AP.TM.getDataLayout(); - uint64_t Size = TD->getTypeAllocSize(CV->getType()); +static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { + const DataLayout *DL = AP.TM.getDataLayout(); + uint64_t Size = DL->getTypeAllocSize(CV->getType()); if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) - return AP.OutStreamer.EmitZeros(Size, AddrSpace); + return AP.OutStreamer.EmitZeros(Size); if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { switch (Size) { @@ -1823,64 +1915,64 @@ static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, if (AP.isVerbose()) AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n", CI->getZExtValue()); - AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); + AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size); return; default: - emitGlobalConstantLargeInt(CI, AddrSpace, AP); + emitGlobalConstantLargeInt(CI, AP); return; } } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) - return emitGlobalConstantFP(CFP, AddrSpace, AP); + return emitGlobalConstantFP(CFP, AP); if (isa<ConstantPointerNull>(CV)) { - AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); + AP.OutStreamer.EmitIntValue(0, Size); return; } if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV)) - return emitGlobalConstantDataSequential(CDS, AddrSpace, AP); + return emitGlobalConstantDataSequential(CDS, AP); if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) - return emitGlobalConstantArray(CVA, AddrSpace, AP); + return emitGlobalConstantArray(CVA, AP); if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) - return emitGlobalConstantStruct(CVS, AddrSpace, AP); + return emitGlobalConstantStruct(CVS, AP); if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of // vectors). if (CE->getOpcode() == Instruction::BitCast) - return emitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP); + return emitGlobalConstantImpl(CE->getOperand(0), AP); if (Size > 8) { // If the constant expression's size is greater than 64-bits, then we have // to emit the value in chunks. Try to constant fold the value and emit it // that way. - Constant *New = ConstantFoldConstantExpression(CE, TD); + Constant *New = ConstantFoldConstantExpression(CE, DL); if (New && New != CE) - return emitGlobalConstantImpl(New, AddrSpace, AP); + return emitGlobalConstantImpl(New, AP); } } if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) - return emitGlobalConstantVector(V, AddrSpace, AP); + return emitGlobalConstantVector(V, AP); // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size, AddrSpace); + AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. -void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { +void AsmPrinter::EmitGlobalConstant(const Constant *CV) { uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType()); if (Size) - emitGlobalConstantImpl(CV, AddrSpace, *this); + emitGlobalConstantImpl(CV, *this); else if (MAI->hasSubsectionsViaSymbols()) { // If the global has zero size, emit a single byte so that two labels don't // look like they are at the same location. - OutStreamer.EmitIntValue(0, 1, AddrSpace); + OutStreamer.EmitIntValue(0, 1); } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 31e42d4..b92f49c 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -33,7 +33,7 @@ using namespace llvm; //===----------------------------------------------------------------------===// /// EmitSLEB128 - emit the specified signed leb128 value. -void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const { +void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const { if (isVerbose() && Desc) OutStreamer.AddComment(Desc); @@ -41,7 +41,7 @@ void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const { } /// EmitULEB128 - emit the specified signed leb128 value. -void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc, +void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, unsigned PadTo) const { if (isVerbose() && Desc) OutStreamer.AddComment(Desc); @@ -169,28 +169,27 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, // Dwarf Lowering Routines //===----------------------------------------------------------------------===// -/// EmitCFIFrameMove - Emit a frame instruction. -void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const { - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - - const MachineLocation &Dst = Move.getDestination(); - const MachineLocation &Src = Move.getSource(); - - // If advancing cfa. - if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { - if (Src.getReg() == MachineLocation::VirtualFP) { - OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset()); - } else { - // Reg + Offset - OutStreamer.EmitCFIDefCfa(RI->getDwarfRegNum(Src.getReg(), true), - Src.getOffset()); - } - } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { - assert(Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true)); - } else { - assert(!Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true), - Dst.getOffset()); +void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { + switch (Inst.getOperation()) { + default: + llvm_unreachable("Unexpected instruction"); + case MCCFIInstruction::OpDefCfaOffset: + OutStreamer.EmitCFIDefCfaOffset(Inst.getOffset()); + break; + case MCCFIInstruction::OpDefCfa: + OutStreamer.EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset()); + break; + case MCCFIInstruction::OpDefCfaRegister: + OutStreamer.EmitCFIDefCfaRegister(Inst.getRegister()); + break; + case MCCFIInstruction::OpOffset: + OutStreamer.EmitCFIOffset(Inst.getRegister(), Inst.getOffset()); + break; + case MCCFIInstruction::OpRegister: + OutStreamer.EmitCFIRegister(Inst.getRegister(), Inst.getRegister2()); + break; + case MCCFIInstruction::OpWindowSave: + OutStreamer.EmitCFIWindowSave(); + break; } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index abfa330..4f927f6 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -123,7 +123,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, TM.getTargetCPU(), TM.getTargetFeatureString())); OwningPtr<MCTargetAsmParser> - TAP(TM.getTarget().createMCAsmParser(*STI, *Parser)); + TAP(TM.getTarget().createMCAsmParser(*STI, *Parser, *MII)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); @@ -213,7 +213,7 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, } else { unsigned OpFlags = MI->getOperand(OpNo).getImm(); ++OpNo; // Skip over the ID number. - + if (InlineAsm::isMemKind(OpFlags)) { Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, /*Modifier*/ 0, OS); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 673867a..e39b374 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "DIE.h" +#include "DwarfDebug.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/IR/DataLayout.h" @@ -23,6 +24,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/MD5.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -32,8 +34,10 @@ using namespace llvm; /// Profile - Used to gather unique data for the abbreviation folding set. /// void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(Attribute); - ID.AddInteger(Form); + // Explicitly cast to an integer type for which FoldingSetNodeID has + // overloads. Otherwise MSVC 2010 thinks this call is ambiguous. + ID.AddInteger(unsigned(Attribute)); + ID.AddInteger(unsigned(Form)); } //===----------------------------------------------------------------------===// @@ -43,7 +47,7 @@ void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const { /// Profile - Used to gather unique data for the abbreviation folding set. /// void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(Tag); + ID.AddInteger(unsigned(Tag)); ID.AddInteger(ChildrenFlag); // For each attribute description. @@ -55,11 +59,9 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { /// void DIEAbbrev::Emit(AsmPrinter *AP) const { // Emit its Dwarf tag type. - // FIXME: Doing work even in non-asm-verbose runs. AP->EmitULEB128(Tag, dwarf::TagString(Tag)); // Emit whether it has children DIEs. - // FIXME: Doing work even in non-asm-verbose runs. AP->EmitULEB128(ChildrenFlag, dwarf::ChildrenString(ChildrenFlag)); // For each attribute description. @@ -67,12 +69,10 @@ void DIEAbbrev::Emit(AsmPrinter *AP) const { const DIEAbbrevData &AttrData = Data[i]; // Emit attribute type. - // FIXME: Doing work even in non-asm-verbose runs. AP->EmitULEB128(AttrData.getAttribute(), dwarf::AttributeString(AttrData.getAttribute())); // Emit form type. - // FIXME: Doing work even in non-asm-verbose runs. AP->EmitULEB128(AttrData.getForm(), dwarf::FormEncodingString(AttrData.getForm())); } @@ -114,14 +114,34 @@ DIE::~DIE() { /// Climb up the parent chain to get the compile unit DIE to which this DIE /// belongs. -DIE *DIE::getCompileUnit() const { - DIE *p = getParent(); +const DIE *DIE::getCompileUnit() const { + const DIE *Cu = getCompileUnitOrNull(); + assert(Cu && "We should not have orphaned DIEs."); + return Cu; +} + +/// Climb up the parent chain to get the compile unit DIE this DIE belongs +/// to. Return NULL if DIE is not added to an owner yet. +const DIE *DIE::getCompileUnitOrNull() const { + const DIE *p = this; while (p) { if (p->getTag() == dwarf::DW_TAG_compile_unit) return p; p = p->getParent(); } - llvm_unreachable("We should not have orphaned DIEs."); + return NULL; +} + +DIEValue *DIE::findAttribute(uint16_t Attribute) { + const SmallVectorImpl<DIEValue *> &Values = getValues(); + const DIEAbbrev &Abbrevs = getAbbrev(); + + // Iterate through all the attributes until we find the one we're + // looking for, if we can't find it return NULL. + for (size_t i = 0; i < Values.size(); ++i) + if (Abbrevs.getData()[i].getAttribute() == Attribute) + return Values[i]; + return NULL; } #ifndef NDEBUG @@ -178,7 +198,7 @@ void DIE::dump() { void DIEValue::anchor() { } #ifndef NDEBUG -void DIEValue::dump() { +void DIEValue::dump() const { print(dbgs()); } #endif @@ -189,14 +209,14 @@ void DIEValue::dump() { /// EmitValue - Emit integer of appropriate size. /// -void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { +void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { unsigned Size = ~0U; switch (Form) { case dwarf::DW_FORM_flag_present: // Emit something to keep the lines and comments in sync. // FIXME: Is there a better way to do this? if (Asm->OutStreamer.hasRawTextSupport()) - Asm->OutStreamer.EmitRawText(StringRef("")); + Asm->OutStreamer.EmitRawText(""); return; case dwarf::DW_FORM_flag: // Fall thru case dwarf::DW_FORM_ref1: // Fall thru @@ -221,7 +241,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { /// SizeOf - Determine size of integer value in bytes. /// -unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_flag_present: return 0; case dwarf::DW_FORM_flag: // Fall thru @@ -244,25 +264,54 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { } #ifndef NDEBUG -void DIEInteger::print(raw_ostream &O) { +void DIEInteger::print(raw_ostream &O) const { O << "Int: " << (int64_t)Integer << " 0x"; O.write_hex(Integer); } #endif //===----------------------------------------------------------------------===// +// DIEExpr Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit expression value. +/// +void DIEExpr::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { + AP->OutStreamer.EmitValue(Expr, SizeOf(AP, Form)); +} + +/// SizeOf - Determine size of expression value in bytes. +/// +unsigned DIEExpr::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; + return AP->getDataLayout().getPointerSize(); +} + +#ifndef NDEBUG +void DIEExpr::print(raw_ostream &O) const { + O << "Expr: "; + Expr->print(O); +} +#endif + +//===----------------------------------------------------------------------===// // DIELabel Implementation //===----------------------------------------------------------------------===// /// EmitValue - Emit label value. /// -void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { - AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form)); +void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { + AP->EmitLabelReference(Label, SizeOf(AP, Form), + Form == dwarf::DW_FORM_strp || + Form == dwarf::DW_FORM_sec_offset || + Form == dwarf::DW_FORM_ref_addr); } /// SizeOf - Determine size of label value in bytes. /// -unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIELabel::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; @@ -270,7 +319,7 @@ unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { } #ifndef NDEBUG -void DIELabel::print(raw_ostream &O) { +void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); } #endif @@ -281,36 +330,70 @@ void DIELabel::print(raw_ostream &O) { /// EmitValue - Emit delta value. /// -void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const { +void DIEDelta::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form)); } /// SizeOf - Determine size of delta value in bytes. /// -unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; return AP->getDataLayout().getPointerSize(); } #ifndef NDEBUG -void DIEDelta::print(raw_ostream &O) { +void DIEDelta::print(raw_ostream &O) const { O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName(); } #endif //===----------------------------------------------------------------------===// +// DIEString Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit string value. +/// +void DIEString::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { + Access->EmitValue(AP, Form); +} + +/// SizeOf - Determine size of delta value in bytes. +/// +unsigned DIEString::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { + return Access->SizeOf(AP, Form); +} + +#ifndef NDEBUG +void DIEString::print(raw_ostream &O) const { + O << "String: " << Str << "\tSymbol: "; + Access->print(O); +} +#endif + +//===----------------------------------------------------------------------===// // DIEEntry Implementation //===----------------------------------------------------------------------===// /// EmitValue - Emit debug information entry offset. /// -void DIEEntry::EmitValue(AsmPrinter *AP, unsigned Form) const { +void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { AP->EmitInt32(Entry->getOffset()); } +unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) { + // DWARF4: References that use the attribute form DW_FORM_ref_addr are + // specified to be four bytes in the DWARF 32-bit format and eight bytes + // in the DWARF 64-bit format, while DWARF Version 2 specifies that such + // references have the same size as an address on the target system. + if (AP->getDwarfDebug()->getDwarfVersion() == 2) + return AP->getDataLayout().getPointerSize(); + return sizeof(int32_t); +} + #ifndef NDEBUG -void DIEEntry::print(raw_ostream &O) { +void DIEEntry::print(raw_ostream &O) const { O << format("Die: 0x%lx", (long)(intptr_t)Entry); } #endif @@ -333,7 +416,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) { /// EmitValue - Emit block data. /// -void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const { +void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { switch (Form) { default: llvm_unreachable("Improper form for block"); case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; @@ -349,7 +432,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const { /// SizeOf - Determine size of block data in bytes. /// -unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIEBlock::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); @@ -360,7 +443,7 @@ unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const { } #ifndef NDEBUG -void DIEBlock::print(raw_ostream &O) { +void DIEBlock::print(raw_ostream &O) const { O << "Blk: "; DIE::print(O, 5); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h index 3c06001..f4fa326 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h @@ -18,30 +18,32 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Dwarf.h" +#include "llvm/MC/MCExpr.h" #include <vector> namespace llvm { class AsmPrinter; class MCSymbol; + class MCSymbolRefExpr; class raw_ostream; //===--------------------------------------------------------------------===// - /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a + /// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a /// Dwarf abbreviation. class DIEAbbrevData { /// Attribute - Dwarf attribute code. /// - uint16_t Attribute; + dwarf::Attribute Attribute; /// Form - Dwarf form code. /// - uint16_t Form; + dwarf::Form Form; public: - DIEAbbrevData(uint16_t A, uint16_t F) : Attribute(A), Form(F) {} + DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) : Attribute(A), Form(F) {} // Accessors. - uint16_t getAttribute() const { return Attribute; } - uint16_t getForm() const { return Form; } + dwarf::Attribute getAttribute() const { return Attribute; } + dwarf::Form getForm() const { return Form; } /// Profile - Used to gather unique data for the abbreviation folding set. /// @@ -54,7 +56,7 @@ namespace llvm { class DIEAbbrev : public FoldingSetNode { /// Tag - Dwarf tag code. /// - uint16_t Tag; + dwarf::Tag Tag; /// ChildrenFlag - Dwarf children flag. /// @@ -69,29 +71,22 @@ namespace llvm { SmallVector<DIEAbbrevData, 12> Data; public: - DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {} + DIEAbbrev(dwarf::Tag T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {} // Accessors. - uint16_t getTag() const { return Tag; } + dwarf::Tag getTag() const { return Tag; } unsigned getNumber() const { return Number; } uint16_t getChildrenFlag() const { return ChildrenFlag; } const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; } - void setTag(uint16_t T) { Tag = T; } void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; } void setNumber(unsigned N) { Number = N; } /// AddAttribute - Adds another set of attribute information to the /// abbreviation. - void AddAttribute(uint16_t Attribute, uint16_t Form) { + void AddAttribute(dwarf::Attribute Attribute, dwarf::Form Form) { Data.push_back(DIEAbbrevData(Attribute, Form)); } - /// AddFirstAttribute - Adds a set of attribute information to the front - /// of the abbreviation. - void AddFirstAttribute(uint16_t Attribute, uint16_t Form) { - Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form)); - } - /// Profile - Used to gather unique data for the abbreviation folding set. /// void Profile(FoldingSetNodeID &ID) const; @@ -135,17 +130,17 @@ namespace llvm { /// SmallVector<DIEValue*, 12> Values; - // Private data for print() - mutable unsigned IndentCount; public: explicit DIE(unsigned Tag) - : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0) {} + : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no), + Parent(0) {} virtual ~DIE(); // Accessors. DIEAbbrev &getAbbrev() { return Abbrev; } + const DIEAbbrev &getAbbrev() const { return Abbrev; } unsigned getAbbrevNumber() const { return Abbrev.getNumber(); } - unsigned getTag() const { return Abbrev.getTag(); } + dwarf::Tag getTag() const { return Abbrev.getTag(); } unsigned getOffset() const { return Offset; } unsigned getSize() const { return Size; } const std::vector<DIE *> &getChildren() const { return Children; } @@ -153,14 +148,17 @@ namespace llvm { DIE *getParent() const { return Parent; } /// Climb up the parent chain to get the compile unit DIE this DIE belongs /// to. - DIE *getCompileUnit() const; - void setTag(unsigned Tag) { Abbrev.setTag(Tag); } + const DIE *getCompileUnit() const; + /// Similar to getCompileUnit, returns null when DIE is not added to an + /// owner yet. + const DIE *getCompileUnitOrNull() const; void setOffset(unsigned O) { Offset = O; } void setSize(unsigned S) { Size = S; } /// addValue - Add a value and attributes to a DIE. /// - void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) { + void addValue(dwarf::Attribute Attribute, dwarf::Form Form, + DIEValue *Value) { Abbrev.AddAttribute(Attribute, Form); Values.push_back(Value); } @@ -168,15 +166,16 @@ namespace llvm { /// addChild - Add a child to the DIE. /// void addChild(DIE *Child) { - if (Child->getParent()) { - assert (Child->getParent() == this && "Unexpected DIE Parent!"); - return; - } + assert(!Child->getParent()); Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes); Children.push_back(Child); Child->Parent = this; } + /// findAttribute - Find a value in the DIE with the attribute given, returns NULL + /// if no such attribute exists. + DIEValue *findAttribute(uint16_t Attribute); + #ifndef NDEBUG void print(raw_ostream &O, unsigned IndentCount = 0) const; void dump(); @@ -192,6 +191,7 @@ namespace llvm { enum { isInteger, isString, + isExpr, isLabel, isDelta, isEntry, @@ -210,15 +210,15 @@ namespace llvm { /// EmitValue - Emit value via the Dwarf writer. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const = 0; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const = 0; /// SizeOf - Return the size of a value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const = 0; #ifndef NDEBUG - virtual void print(raw_ostream &O) = 0; - void dump(); + virtual void print(raw_ostream &O) const = 0; + void dump() const; #endif }; @@ -232,7 +232,7 @@ namespace llvm { /// BestForm - Choose the best form for integer. /// - static unsigned BestForm(bool IsSigned, uint64_t Int) { + static dwarf::Form BestForm(bool IsSigned, uint64_t Int) { if (IsSigned) { const int64_t SignedInt = Int; if ((char)Int == SignedInt) return dwarf::DW_FORM_data1; @@ -248,24 +248,52 @@ namespace llvm { /// EmitValue - Emit integer of appropriate size. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; uint64_t getValue() const { return Integer; } /// SizeOf - Determine size of integer value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *I) { return I->getType() == isInteger; } #ifndef NDEBUG - virtual void print(raw_ostream &O); + virtual void print(raw_ostream &O) const; #endif }; //===--------------------------------------------------------------------===// - /// DIELabel - A label expression DIE. + /// DIEExpr - An expression DIE. + // + class DIEExpr : public DIEValue { + const MCExpr *Expr; + public: + explicit DIEExpr(const MCExpr *E) : DIEValue(isExpr), Expr(E) {} + + /// EmitValue - Emit expression value. + /// + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; + + /// getValue - Get MCExpr. + /// + const MCExpr *getValue() const { return Expr; } + + /// SizeOf - Determine size of expression value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { return E->getType() == isExpr; } + +#ifndef NDEBUG + virtual void print(raw_ostream &O) const; +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIELabel - A label DIE. // class DIELabel : public DIEValue { const MCSymbol *Label; @@ -274,21 +302,21 @@ namespace llvm { /// EmitValue - Emit label value. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// getValue - Get MCSymbol. /// - const MCSymbol *getValue() const { return Label; } + const MCSymbol *getValue() const { return Label; } /// SizeOf - Determine size of label value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *L) { return L->getType() == isLabel; } #ifndef NDEBUG - virtual void print(raw_ostream &O); + virtual void print(raw_ostream &O) const; #endif }; @@ -304,46 +332,82 @@ namespace llvm { /// EmitValue - Emit delta value. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// SizeOf - Determine size of delta value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *D) { return D->getType() == isDelta; } #ifndef NDEBUG - virtual void print(raw_ostream &O); + virtual void print(raw_ostream &O) const; #endif }; //===--------------------------------------------------------------------===// + /// DIEString - A container for string values. + /// + class DIEString : public DIEValue { + const DIEValue *Access; + const StringRef Str; + + public: + DIEString(const DIEValue *Acc, const StringRef S) + : DIEValue(isString), Access(Acc), Str(S) {} + + /// getString - Grab the string out of the object. + StringRef getString() const { return Str; } + + /// EmitValue - Emit delta value. + /// + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; + + /// SizeOf - Determine size of delta value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *D) { return D->getType() == isString; } + + #ifndef NDEBUG + virtual void print(raw_ostream &O) const; + #endif + }; + + //===--------------------------------------------------------------------===// /// DIEEntry - A pointer to another debug information entry. An instance of /// this class can also be used as a proxy for a debug information entry not /// yet defined (ie. types.) class DIEEntry : public DIEValue { DIE *const Entry; public: - explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {} + explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) { + assert(E && "Cannot construct a DIEEntry with a null DIE"); + } DIE *getEntry() const { return Entry; } /// EmitValue - Emit debug information entry offset. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// SizeOf - Determine size of debug information entry in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const { - return sizeof(int32_t); + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const { + return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP) + : sizeof(int32_t); } + /// Returns size of a ref_addr entry. + static unsigned getRefAddrSize(AsmPrinter *AP); + // Implement isa/cast/dyncast. static bool classof(const DIEValue *E) { return E->getType() == isEntry; } #ifndef NDEBUG - virtual void print(raw_ostream &O); + virtual void print(raw_ostream &O) const; #endif }; @@ -353,9 +417,7 @@ namespace llvm { class DIEBlock : public DIEValue, public DIE { unsigned Size; // Size in bytes excluding size header. public: - DIEBlock() - : DIEValue(isBlock), DIE(0), Size(0) {} - virtual ~DIEBlock() {} + DIEBlock() : DIEValue(isBlock), DIE(0), Size(0) {} /// ComputeSize - calculate the size of the block. /// @@ -363,7 +425,7 @@ namespace llvm { /// BestForm - Choose the best form for data. /// - unsigned BestForm() const { + dwarf::Form BestForm() const { if ((unsigned char)Size == Size) return dwarf::DW_FORM_block1; if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2; if ((unsigned int)Size == Size) return dwarf::DW_FORM_block4; @@ -372,17 +434,17 @@ namespace llvm { /// EmitValue - Emit block data. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// SizeOf - Determine size of block data in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *E) { return E->getType() == isBlock; } #ifndef NDEBUG - virtual void print(raw_ostream &O); + virtual void print(raw_ostream &O) const; #endif }; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp new file mode 100644 index 0000000..95eca90 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -0,0 +1,507 @@ +//===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for DWARF4 hashing of DIEs. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dwarfdebug" + +#include "DIEHash.h" + +#include "DIE.h" +#include "DwarfCompileUnit.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +/// \brief Grabs the string in whichever attribute is passed in and returns +/// a reference to it. +static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) { + const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); + const DIEAbbrev &Abbrevs = Die.getAbbrev(); + + // Iterate through all the attributes until we find the one we're + // looking for, if we can't find it return an empty string. + for (size_t i = 0; i < Values.size(); ++i) { + if (Abbrevs.getData()[i].getAttribute() == Attr) { + DIEValue *V = Values[i]; + assert(isa<DIEString>(V) && "String requested. Not a string."); + DIEString *S = cast<DIEString>(V); + return S->getString(); + } + } + return StringRef(""); +} + +/// \brief Adds the string in \p Str to the hash. This also hashes +/// a trailing NULL with the string. +void DIEHash::addString(StringRef Str) { + DEBUG(dbgs() << "Adding string " << Str << " to hash.\n"); + Hash.update(Str); + Hash.update(makeArrayRef((uint8_t)'\0')); +} + +// FIXME: The LEB128 routines are copied and only slightly modified out of +// LEB128.h. + +/// \brief Adds the unsigned in \p Value to the hash encoded as a ULEB128. +void DIEHash::addULEB128(uint64_t Value) { + DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); + do { + uint8_t Byte = Value & 0x7f; + Value >>= 7; + if (Value != 0) + Byte |= 0x80; // Mark this byte to show that more bytes will follow. + Hash.update(Byte); + } while (Value != 0); +} + +void DIEHash::addSLEB128(int64_t Value) { + DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); + bool More; + do { + uint8_t Byte = Value & 0x7f; + Value >>= 7; + More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || + ((Value == -1) && ((Byte & 0x40) != 0)))); + if (More) + Byte |= 0x80; // Mark this byte to show that more bytes will follow. + Hash.update(Byte); + } while (More); +} + +/// \brief Including \p Parent adds the context of Parent to the hash.. +void DIEHash::addParentContext(const DIE &Parent) { + + DEBUG(dbgs() << "Adding parent context to hash...\n"); + + // [7.27.2] For each surrounding type or namespace beginning with the + // outermost such construct... + SmallVector<const DIE *, 1> Parents; + const DIE *Cur = &Parent; + while (Cur->getTag() != dwarf::DW_TAG_compile_unit) { + Parents.push_back(Cur); + Cur = Cur->getParent(); + } + + // Reverse iterate over our list to go from the outermost construct to the + // innermost. + for (SmallVectorImpl<const DIE *>::reverse_iterator I = Parents.rbegin(), + E = Parents.rend(); + I != E; ++I) { + const DIE &Die = **I; + + // ... Append the letter "C" to the sequence... + addULEB128('C'); + + // ... Followed by the DWARF tag of the construct... + addULEB128(Die.getTag()); + + // ... Then the name, taken from the DW_AT_name attribute. + StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name); + DEBUG(dbgs() << "... adding context: " << Name << "\n"); + if (!Name.empty()) + addString(Name); + } +} + +// Collect all of the attributes for a particular DIE in single structure. +void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) { + const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); + const DIEAbbrev &Abbrevs = Die.getAbbrev(); + +#define COLLECT_ATTR(NAME) \ + case dwarf::NAME: \ + Attrs.NAME.Val = Values[i]; \ + Attrs.NAME.Desc = &Abbrevs.getData()[i]; \ + break + + for (size_t i = 0, e = Values.size(); i != e; ++i) { + DEBUG(dbgs() << "Attribute: " + << dwarf::AttributeString(Abbrevs.getData()[i].getAttribute()) + << " added.\n"); + switch (Abbrevs.getData()[i].getAttribute()) { + COLLECT_ATTR(DW_AT_name); + COLLECT_ATTR(DW_AT_accessibility); + COLLECT_ATTR(DW_AT_address_class); + COLLECT_ATTR(DW_AT_allocated); + COLLECT_ATTR(DW_AT_artificial); + COLLECT_ATTR(DW_AT_associated); + COLLECT_ATTR(DW_AT_binary_scale); + COLLECT_ATTR(DW_AT_bit_offset); + COLLECT_ATTR(DW_AT_bit_size); + COLLECT_ATTR(DW_AT_bit_stride); + COLLECT_ATTR(DW_AT_byte_size); + COLLECT_ATTR(DW_AT_byte_stride); + COLLECT_ATTR(DW_AT_const_expr); + COLLECT_ATTR(DW_AT_const_value); + COLLECT_ATTR(DW_AT_containing_type); + COLLECT_ATTR(DW_AT_count); + COLLECT_ATTR(DW_AT_data_bit_offset); + COLLECT_ATTR(DW_AT_data_location); + COLLECT_ATTR(DW_AT_data_member_location); + COLLECT_ATTR(DW_AT_decimal_scale); + COLLECT_ATTR(DW_AT_decimal_sign); + COLLECT_ATTR(DW_AT_default_value); + COLLECT_ATTR(DW_AT_digit_count); + COLLECT_ATTR(DW_AT_discr); + COLLECT_ATTR(DW_AT_discr_list); + COLLECT_ATTR(DW_AT_discr_value); + COLLECT_ATTR(DW_AT_encoding); + COLLECT_ATTR(DW_AT_enum_class); + COLLECT_ATTR(DW_AT_endianity); + COLLECT_ATTR(DW_AT_explicit); + COLLECT_ATTR(DW_AT_is_optional); + COLLECT_ATTR(DW_AT_location); + COLLECT_ATTR(DW_AT_lower_bound); + COLLECT_ATTR(DW_AT_mutable); + COLLECT_ATTR(DW_AT_ordering); + COLLECT_ATTR(DW_AT_picture_string); + COLLECT_ATTR(DW_AT_prototyped); + COLLECT_ATTR(DW_AT_small); + COLLECT_ATTR(DW_AT_segment); + COLLECT_ATTR(DW_AT_string_length); + COLLECT_ATTR(DW_AT_threads_scaled); + COLLECT_ATTR(DW_AT_upper_bound); + COLLECT_ATTR(DW_AT_use_location); + COLLECT_ATTR(DW_AT_use_UTF8); + COLLECT_ATTR(DW_AT_variable_parameter); + COLLECT_ATTR(DW_AT_virtuality); + COLLECT_ATTR(DW_AT_visibility); + COLLECT_ATTR(DW_AT_vtable_elem_location); + COLLECT_ATTR(DW_AT_type); + default: + break; + } + } +} + +void DIEHash::hashShallowTypeReference(dwarf::Attribute Attribute, + const DIE &Entry, StringRef Name) { + // append the letter 'N' + addULEB128('N'); + + // the DWARF attribute code (DW_AT_type or DW_AT_friend), + addULEB128(Attribute); + + // the context of the tag, + if (const DIE *Parent = Entry.getParent()) + addParentContext(*Parent); + + // the letter 'E', + addULEB128('E'); + + // and the name of the type. + addString(Name); + + // Currently DW_TAG_friends are not used by Clang, but if they do become so, + // here's the relevant spec text to implement: + // + // For DW_TAG_friend, if the referenced entry is the DW_TAG_subprogram, + // the context is omitted and the name to be used is the ABI-specific name + // of the subprogram (e.g., the mangled linker name). +} + +void DIEHash::hashRepeatedTypeReference(dwarf::Attribute Attribute, + unsigned DieNumber) { + // a) If T is in the list of [previously hashed types], use the letter + // 'R' as the marker + addULEB128('R'); + + addULEB128(Attribute); + + // and use the unsigned LEB128 encoding of [the index of T in the + // list] as the attribute value; + addULEB128(DieNumber); +} + +void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, + const DIE &Entry) { + assert(Tag != dwarf::DW_TAG_friend && "No current LLVM clients emit friend " + "tags. Add support here when there's " + "a use case"); + // Step 5 + // If the tag in Step 3 is one of [the below tags] + if ((Tag == dwarf::DW_TAG_pointer_type || + Tag == dwarf::DW_TAG_reference_type || + Tag == dwarf::DW_TAG_rvalue_reference_type || + Tag == dwarf::DW_TAG_ptr_to_member_type) && + // and the referenced type (via the [below attributes]) + // FIXME: This seems overly restrictive, and causes hash mismatches + // there's a decl/def difference in the containing type of a + // ptr_to_member_type, but it's what DWARF says, for some reason. + Attribute == dwarf::DW_AT_type) { + // ... has a DW_AT_name attribute, + StringRef Name = getDIEStringAttr(Entry, dwarf::DW_AT_name); + if (!Name.empty()) { + hashShallowTypeReference(Attribute, Entry, Name); + return; + } + } + + unsigned &DieNumber = Numbering[&Entry]; + if (DieNumber) { + hashRepeatedTypeReference(Attribute, DieNumber); + return; + } + + // otherwise, b) use the letter 'T' as a the marker, ... + addULEB128('T'); + + addULEB128(Attribute); + + // ... process the type T recursively by performing Steps 2 through 7, and + // use the result as the attribute value. + DieNumber = Numbering.size(); + computeHash(Entry); +} + +// Hash an individual attribute \param Attr based on the type of attribute and +// the form. +void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) { + const DIEValue *Value = Attr.Val; + const DIEAbbrevData *Desc = Attr.Desc; + dwarf::Attribute Attribute = Desc->getAttribute(); + + // 7.27 Step 3 + // ... An attribute that refers to another type entry T is processed as + // follows: + if (const DIEEntry *EntryAttr = dyn_cast<DIEEntry>(Value)) { + hashDIEEntry(Attribute, Tag, *EntryAttr->getEntry()); + return; + } + + // Other attribute values use the letter 'A' as the marker, ... + addULEB128('A'); + + addULEB128(Attribute); + + // ... and the value consists of the form code (encoded as an unsigned LEB128 + // value) followed by the encoding of the value according to the form code. To + // ensure reproducibility of the signature, the set of forms used in the + // signature computation is limited to the following: DW_FORM_sdata, + // DW_FORM_flag, DW_FORM_string, and DW_FORM_block. + switch (Desc->getForm()) { + case dwarf::DW_FORM_string: + llvm_unreachable( + "Add support for DW_FORM_string if we ever start emitting them again"); + case dwarf::DW_FORM_GNU_str_index: + case dwarf::DW_FORM_strp: + addULEB128(dwarf::DW_FORM_string); + addString(cast<DIEString>(Value)->getString()); + break; + case dwarf::DW_FORM_data1: + case dwarf::DW_FORM_data2: + case dwarf::DW_FORM_data4: + case dwarf::DW_FORM_data8: + case dwarf::DW_FORM_udata: + addULEB128(dwarf::DW_FORM_sdata); + addSLEB128((int64_t)cast<DIEInteger>(Value)->getValue()); + break; + default: + llvm_unreachable("Add support for additional forms"); + } +} + +// Go through the attributes from \param Attrs in the order specified in 7.27.4 +// and hash them. +void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) { +#define ADD_ATTR(ATTR) \ + { \ + if (ATTR.Val != 0) \ + hashAttribute(ATTR, Tag); \ + } + + ADD_ATTR(Attrs.DW_AT_name); + ADD_ATTR(Attrs.DW_AT_accessibility); + ADD_ATTR(Attrs.DW_AT_address_class); + ADD_ATTR(Attrs.DW_AT_allocated); + ADD_ATTR(Attrs.DW_AT_artificial); + ADD_ATTR(Attrs.DW_AT_associated); + ADD_ATTR(Attrs.DW_AT_binary_scale); + ADD_ATTR(Attrs.DW_AT_bit_offset); + ADD_ATTR(Attrs.DW_AT_bit_size); + ADD_ATTR(Attrs.DW_AT_bit_stride); + ADD_ATTR(Attrs.DW_AT_byte_size); + ADD_ATTR(Attrs.DW_AT_byte_stride); + ADD_ATTR(Attrs.DW_AT_const_expr); + ADD_ATTR(Attrs.DW_AT_const_value); + ADD_ATTR(Attrs.DW_AT_containing_type); + ADD_ATTR(Attrs.DW_AT_count); + ADD_ATTR(Attrs.DW_AT_data_bit_offset); + ADD_ATTR(Attrs.DW_AT_data_location); + ADD_ATTR(Attrs.DW_AT_data_member_location); + ADD_ATTR(Attrs.DW_AT_decimal_scale); + ADD_ATTR(Attrs.DW_AT_decimal_sign); + ADD_ATTR(Attrs.DW_AT_default_value); + ADD_ATTR(Attrs.DW_AT_digit_count); + ADD_ATTR(Attrs.DW_AT_discr); + ADD_ATTR(Attrs.DW_AT_discr_list); + ADD_ATTR(Attrs.DW_AT_discr_value); + ADD_ATTR(Attrs.DW_AT_encoding); + ADD_ATTR(Attrs.DW_AT_enum_class); + ADD_ATTR(Attrs.DW_AT_endianity); + ADD_ATTR(Attrs.DW_AT_explicit); + ADD_ATTR(Attrs.DW_AT_is_optional); + ADD_ATTR(Attrs.DW_AT_location); + ADD_ATTR(Attrs.DW_AT_lower_bound); + ADD_ATTR(Attrs.DW_AT_mutable); + ADD_ATTR(Attrs.DW_AT_ordering); + ADD_ATTR(Attrs.DW_AT_picture_string); + ADD_ATTR(Attrs.DW_AT_prototyped); + ADD_ATTR(Attrs.DW_AT_small); + ADD_ATTR(Attrs.DW_AT_segment); + ADD_ATTR(Attrs.DW_AT_string_length); + ADD_ATTR(Attrs.DW_AT_threads_scaled); + ADD_ATTR(Attrs.DW_AT_upper_bound); + ADD_ATTR(Attrs.DW_AT_use_location); + ADD_ATTR(Attrs.DW_AT_use_UTF8); + ADD_ATTR(Attrs.DW_AT_variable_parameter); + ADD_ATTR(Attrs.DW_AT_virtuality); + ADD_ATTR(Attrs.DW_AT_visibility); + ADD_ATTR(Attrs.DW_AT_vtable_elem_location); + ADD_ATTR(Attrs.DW_AT_type); + + // FIXME: Add the extended attributes. +} + +// Add all of the attributes for \param Die to the hash. +void DIEHash::addAttributes(const DIE &Die) { + DIEAttrs Attrs = {}; + collectAttributes(Die, Attrs); + hashAttributes(Attrs, Die.getTag()); +} + +void DIEHash::hashNestedType(const DIE &Die, StringRef Name) { + // 7.27 Step 7 + // ... append the letter 'S', + addULEB128('S'); + + // the tag of C, + addULEB128(Die.getTag()); + + // and the name. + addString(Name); +} + +// Compute the hash of a DIE. This is based on the type signature computation +// given in section 7.27 of the DWARF4 standard. It is the md5 hash of a +// flattened description of the DIE. +void DIEHash::computeHash(const DIE &Die) { + // Append the letter 'D', followed by the DWARF tag of the DIE. + addULEB128('D'); + addULEB128(Die.getTag()); + + // Add each of the attributes of the DIE. + addAttributes(Die); + + // Then hash each of the children of the DIE. + for (std::vector<DIE *>::const_iterator I = Die.getChildren().begin(), + E = Die.getChildren().end(); + I != E; ++I) { + // 7.27 Step 7 + // If C is a nested type entry or a member function entry, ... + if (isType((*I)->getTag()) || (*I)->getTag() == dwarf::DW_TAG_subprogram) { + StringRef Name = getDIEStringAttr(**I, dwarf::DW_AT_name); + // ... and has a DW_AT_name attribute + if (!Name.empty()) { + hashNestedType(**I, Name); + continue; + } + } + computeHash(**I); + } + + // Following the last (or if there are no children), append a zero byte. + Hash.update(makeArrayRef((uint8_t)'\0')); +} + +/// This is based on the type signature computation given in section 7.27 of the +/// DWARF4 standard. It is the md5 hash of a flattened description of the DIE +/// with the exception that we are hashing only the context and the name of the +/// type. +uint64_t DIEHash::computeDIEODRSignature(const DIE &Die) { + + // Add the contexts to the hash. We won't be computing the ODR hash for + // function local types so it's safe to use the generic context hashing + // algorithm here. + // FIXME: If we figure out how to account for linkage in some way we could + // actually do this with a slight modification to the parent hash algorithm. + if (const DIE *Parent = Die.getParent()) + addParentContext(*Parent); + + // Add the current DIE information. + + // Add the DWARF tag of the DIE. + addULEB128(Die.getTag()); + + // Add the name of the type to the hash. + addString(getDIEStringAttr(Die, dwarf::DW_AT_name)); + + // Now get the result. + MD5::MD5Result Result; + Hash.final(Result); + + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + return *reinterpret_cast<support::ulittle64_t *>(Result + 8); +} + +/// This is based on the type signature computation given in section 7.27 of the +/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE +/// with the inclusion of the full CU and all top level CU entities. +// TODO: Initialize the type chain at 0 instead of 1 for CU signatures. +uint64_t DIEHash::computeCUSignature(const DIE &Die) { + Numbering.clear(); + Numbering[&Die] = 1; + + // Hash the DIE. + computeHash(Die); + + // Now return the result. + MD5::MD5Result Result; + Hash.final(Result); + + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + return *reinterpret_cast<support::ulittle64_t *>(Result + 8); +} + +/// This is based on the type signature computation given in section 7.27 of the +/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE +/// with the inclusion of additional forms not specifically called out in the +/// standard. +uint64_t DIEHash::computeTypeSignature(const DIE &Die) { + Numbering.clear(); + Numbering[&Die] = 1; + + if (const DIE *Parent = Die.getParent()) + addParentContext(*Parent); + + // Hash the DIE. + computeHash(Die); + + // Now return the result. + MD5::MD5Result Result; + Hash.final(Result); + + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + return *reinterpret_cast<support::ulittle64_t *>(Result + 8); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h new file mode 100644 index 0000000..f0c4ef9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h @@ -0,0 +1,147 @@ +//===-- llvm/CodeGen/DIEHash.h - Dwarf Hashing Framework -------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for DWARF4 hashing of DIEs. +// +//===----------------------------------------------------------------------===// + +#include "DIE.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/MD5.h" + +namespace llvm { + +class CompileUnit; + +/// \brief An object containing the capability of hashing and adding hash +/// attributes onto a DIE. +class DIEHash { + // The entry for a particular attribute. + struct AttrEntry { + const DIEValue *Val; + const DIEAbbrevData *Desc; + }; + + // Collection of all attributes used in hashing a particular DIE. + struct DIEAttrs { + AttrEntry DW_AT_name; + AttrEntry DW_AT_accessibility; + AttrEntry DW_AT_address_class; + AttrEntry DW_AT_allocated; + AttrEntry DW_AT_artificial; + AttrEntry DW_AT_associated; + AttrEntry DW_AT_binary_scale; + AttrEntry DW_AT_bit_offset; + AttrEntry DW_AT_bit_size; + AttrEntry DW_AT_bit_stride; + AttrEntry DW_AT_byte_size; + AttrEntry DW_AT_byte_stride; + AttrEntry DW_AT_const_expr; + AttrEntry DW_AT_const_value; + AttrEntry DW_AT_containing_type; + AttrEntry DW_AT_count; + AttrEntry DW_AT_data_bit_offset; + AttrEntry DW_AT_data_location; + AttrEntry DW_AT_data_member_location; + AttrEntry DW_AT_decimal_scale; + AttrEntry DW_AT_decimal_sign; + AttrEntry DW_AT_default_value; + AttrEntry DW_AT_digit_count; + AttrEntry DW_AT_discr; + AttrEntry DW_AT_discr_list; + AttrEntry DW_AT_discr_value; + AttrEntry DW_AT_encoding; + AttrEntry DW_AT_enum_class; + AttrEntry DW_AT_endianity; + AttrEntry DW_AT_explicit; + AttrEntry DW_AT_is_optional; + AttrEntry DW_AT_location; + AttrEntry DW_AT_lower_bound; + AttrEntry DW_AT_mutable; + AttrEntry DW_AT_ordering; + AttrEntry DW_AT_picture_string; + AttrEntry DW_AT_prototyped; + AttrEntry DW_AT_small; + AttrEntry DW_AT_segment; + AttrEntry DW_AT_string_length; + AttrEntry DW_AT_threads_scaled; + AttrEntry DW_AT_upper_bound; + AttrEntry DW_AT_use_location; + AttrEntry DW_AT_use_UTF8; + AttrEntry DW_AT_variable_parameter; + AttrEntry DW_AT_virtuality; + AttrEntry DW_AT_visibility; + AttrEntry DW_AT_vtable_elem_location; + AttrEntry DW_AT_type; + + // Insert any additional ones here... + }; + +public: + /// \brief Computes the ODR signature. + uint64_t computeDIEODRSignature(const DIE &Die); + + /// \brief Computes the CU signature. + uint64_t computeCUSignature(const DIE &Die); + + /// \brief Computes the type signature. + uint64_t computeTypeSignature(const DIE &Die); + + // Helper routines to process parts of a DIE. +private: + /// \brief Adds the parent context of \param Die to the hash. + void addParentContext(const DIE &Die); + + /// \brief Adds the attributes of \param Die to the hash. + void addAttributes(const DIE &Die); + + /// \brief Computes the full DWARF4 7.27 hash of the DIE. + void computeHash(const DIE &Die); + + // Routines that add DIEValues to the hash. +private: + /// \brief Encodes and adds \param Value to the hash as a ULEB128. + void addULEB128(uint64_t Value); + + /// \brief Encodes and adds \param Value to the hash as a SLEB128. + void addSLEB128(int64_t Value); + + /// \brief Adds \param Str to the hash and includes a NULL byte. + void addString(StringRef Str); + + /// \brief Collects the attributes of DIE \param Die into the \param Attrs + /// structure. + void collectAttributes(const DIE &Die, DIEAttrs &Attrs); + + /// \brief Hashes the attributes in \param Attrs in order. + void hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag); + + /// \brief Hashes an individual attribute. + void hashAttribute(AttrEntry Attr, dwarf::Tag Tag); + + /// \brief Hashes an attribute that refers to another DIE. + void hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, + const DIE &Entry); + + /// \brief Hashes a reference to a named type in such a way that is + /// independent of whether that type is described by a declaration or a + /// definition. + void hashShallowTypeReference(dwarf::Attribute Attribute, const DIE &Entry, + StringRef Name); + + /// \brief Hashes a reference to a previously referenced type DIE. + void hashRepeatedTypeReference(dwarf::Attribute Attribute, unsigned DieNumber); + + void hashNestedType(const DIE &Die, StringRef Name); + +private: + MD5 Hash; + DenseMap<const DIE *, unsigned> Numbering; +}; +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index f58ec9b..689aeda 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -24,27 +24,14 @@ using namespace llvm; -const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) { - switch (AT) { - case eAtomTypeNULL: return "eAtomTypeNULL"; - case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset"; - case eAtomTypeCUOffset: return "eAtomTypeCUOffset"; - case eAtomTypeTag: return "eAtomTypeTag"; - case eAtomTypeNameFlags: return "eAtomTypeNameFlags"; - case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags"; - } - llvm_unreachable("invalid AtomType!"); -} - // The length of the header data is always going to be 4 + 4 + 4*NumAtoms. -DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) : - Header(8 + (atomList.size() * 4)), - HeaderData(atomList), - Entries(Allocator) { } +DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) + : Header(8 + (atomList.size() * 4)), HeaderData(atomList), + Entries(Allocator) {} -DwarfAccelTable::~DwarfAccelTable() { } +DwarfAccelTable::~DwarfAccelTable() {} -void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) { +void DwarfAccelTable::AddName(StringRef Name, DIE *die, char Flags) { assert(Data.empty() && "Already finalized!"); // If the string is in the list already then add this die to the list // otherwise add a new one. @@ -59,13 +46,16 @@ void DwarfAccelTable::ComputeBucketCount(void) { uniques[i] = Data[i]->HashValue; array_pod_sort(uniques.begin(), uniques.end()); std::vector<uint32_t>::iterator p = - std::unique(uniques.begin(), uniques.end()); + std::unique(uniques.begin(), uniques.end()); uint32_t num = std::distance(uniques.begin(), p); // Then compute the bucket size, minimum of 1 bucket. - if (num > 1024) Header.bucket_count = num/4; - if (num > 16) Header.bucket_count = num/2; - else Header.bucket_count = num > 0 ? num : 1; + if (num > 1024) + Header.bucket_count = num / 4; + if (num > 16) + Header.bucket_count = num / 2; + else + Header.bucket_count = num > 0 ? num : 1; Header.hashes_count = num; } @@ -76,15 +66,15 @@ static bool compareDIEs(const DwarfAccelTable::HashDataContents *A, return A->Die->getOffset() < B->Die->getOffset(); } -void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) { +void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) { // Create the individual hash data outputs. - for (StringMap<DataArray>::iterator - EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end(); + EI != EE; ++EI) { // Unique the entries. std::stable_sort(EI->second.begin(), EI->second.end(), compareDIEs); EI->second.erase(std::unique(EI->second.begin(), EI->second.end()), - EI->second.end()); + EI->second.end()); HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second); Data.push_back(Entry); @@ -126,7 +116,7 @@ void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { Asm->EmitInt32(HeaderData.Atoms.size()); for (size_t i = 0; i < HeaderData.Atoms.size(); i++) { Atom A = HeaderData.Atoms[i]; - Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type)); + Asm->OutStreamer.AddComment(dwarf::AtomTypeString(A.type)); Asm->EmitInt16(A.type); Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form)); Asm->EmitInt16(A.form); @@ -152,7 +142,8 @@ void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); HI != HE; ++HI) { + HE = Buckets[i].end(); + HI != HE; ++HI) { Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i)); Asm->EmitInt32((*HI)->HashValue); } @@ -166,13 +157,13 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); HI != HE; ++HI) { + HE = Buckets[i].end(); + HI != HE; ++HI) { Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i)); MCContext &Context = Asm->OutStreamer.getContext(); - const MCExpr *Sub = - MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context), - MCSymbolRefExpr::Create(SecBegin, Context), - Context); + const MCExpr *Sub = MCBinaryExpr::CreateSub( + MCSymbolRefExpr::Create((*HI)->Sym, Context), + MCSymbolRefExpr::Create(SecBegin, Context), Context); Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t)); } } @@ -185,7 +176,8 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { uint64_t PrevHash = UINT64_MAX; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); HI != HE; ++HI) { + HE = Buckets[i].end(); + HI != HE; ++HI) { // Remember to emit the label for our offset. Asm->OutStreamer.EmitLabel((*HI)->Sym); Asm->OutStreamer.AddComment((*HI)->Str); @@ -193,8 +185,9 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { D->getStringPoolSym()); Asm->OutStreamer.AddComment("Num DIEs"); Asm->EmitInt32((*HI)->Data.size()); - for (ArrayRef<HashDataContents*>::const_iterator - DI = (*HI)->Data.begin(), DE = (*HI)->Data.end(); + for (ArrayRef<HashDataContents *>::const_iterator + DI = (*HI)->Data.begin(), + DE = (*HI)->Data.end(); DI != DE; ++DI) { // Emit the DIE offset Asm->EmitInt32((*DI)->Die->getOffset()); @@ -214,8 +207,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { } // Emit the entire data structure to the output file. -void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, - DwarfUnits *D) { +void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfUnits *D) { // Emit the header. EmitHeader(Asm); @@ -239,11 +231,12 @@ void DwarfAccelTable::print(raw_ostream &O) { HeaderData.print(O); O << "Entries: \n"; - for (StringMap<DataArray>::const_iterator - EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + for (StringMap<DataArray>::const_iterator EI = Entries.begin(), + EE = Entries.end(); + EI != EE; ++EI) { O << "Name: " << EI->getKeyData() << "\n"; for (DataArray::const_iterator DI = EI->second.begin(), - DE = EI->second.end(); + DE = EI->second.end(); DI != DE; ++DI) (*DI)->print(O); } @@ -251,14 +244,14 @@ void DwarfAccelTable::print(raw_ostream &O) { O << "Buckets and Hashes: \n"; for (size_t i = 0, e = Buckets.size(); i < e; ++i) for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); HI != HE; ++HI) + HE = Buckets[i].end(); + HI != HE; ++HI) (*HI)->print(O); O << "Data: \n"; - for (std::vector<HashData*>::const_iterator - DI = Data.begin(), DE = Data.end(); DI != DE; ++DI) - (*DI)->print(O); - - + for (std::vector<HashData *>::const_iterator DI = Data.begin(), + DE = Data.end(); + DI != DE; ++DI) + (*DI)->print(O); } #endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 9915bca..7627313 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -67,11 +67,7 @@ class DwarfUnits; class DwarfAccelTable { - enum HashFunctionType { - eHashFunctionDJB = 0u - }; - - static uint32_t HashDJB (StringRef Str) { + static uint32_t HashDJB(StringRef Str) { uint32_t h = 5381; for (unsigned i = 0, e = Str.size(); i != e; ++i) h = ((h << 5) + h) + Str[i]; @@ -80,25 +76,25 @@ class DwarfAccelTable { // Helper function to compute the number of buckets needed based on // the number of unique hashes. - void ComputeBucketCount (void); + void ComputeBucketCount(void); struct TableHeader { - uint32_t magic; // 'HASH' magic value to allow endian detection - uint16_t version; // Version number. - uint16_t hash_function; // The hash function enumeration that was used. - uint32_t bucket_count; // The number of buckets in this hash table. - uint32_t hashes_count; // The total number of unique hash values - // and hash data offsets in this table. - uint32_t header_data_len; // The bytes to skip to get to the hash - // indexes (buckets) for correct alignment. + uint32_t magic; // 'HASH' magic value to allow endian detection + uint16_t version; // Version number. + uint16_t hash_function; // The hash function enumeration that was used. + uint32_t bucket_count; // The number of buckets in this hash table. + uint32_t hashes_count; // The total number of unique hash values + // and hash data offsets in this table. + uint32_t header_data_len; // The bytes to skip to get to the hash + // indexes (buckets) for correct alignment. // Also written to disk is the implementation specific header data. static const uint32_t MagicHash = 0x48415348; - TableHeader (uint32_t data_len) : - magic (MagicHash), version (1), hash_function (eHashFunctionDJB), - bucket_count (0), hashes_count (0), header_data_len (data_len) - {} + TableHeader(uint32_t data_len) + : magic(MagicHash), version(1), + hash_function(dwarf::DW_hash_function_djb), bucket_count(0), + hashes_count(0), header_data_len(data_len) {} #ifndef NDEBUG void print(raw_ostream &O) { @@ -124,62 +120,38 @@ public: // uint32_t die_offset_base // uint32_t atom_count // atom_count Atoms - enum AtomType { - eAtomTypeNULL = 0u, - eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding - eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that - // contains the item in question - eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as - // DW_FORM_data1 (if no tags exceed 255) or - // DW_FORM_data2. - eAtomTypeNameFlags = 4u, // Flags from enum NameFlags - eAtomTypeTypeFlags = 5u // Flags from enum TypeFlags - }; - - enum TypeFlags { - eTypeFlagClassMask = 0x0000000fu, - - // Always set for C++, only set for ObjC if this is the - // @implementation for a class. - eTypeFlagClassIsImplementation = ( 1u << 1 ) - }; // Make these public so that they can be used as a general interface to // the class. struct Atom { - AtomType type; // enum AtomType + uint16_t type; // enum AtomType uint16_t form; // DWARF DW_FORM_ defines - Atom(AtomType type, uint16_t form) : type(type), form(form) {} - static const char * AtomTypeString(enum AtomType); + Atom(uint16_t type, uint16_t form) : type(type), form(form) {} #ifndef NDEBUG void print(raw_ostream &O) { - O << "Type: " << AtomTypeString(type) << "\n" + O << "Type: " << dwarf::AtomTypeString(type) << "\n" << "Form: " << dwarf::FormEncodingString(form) << "\n"; } - void dump() { - print(dbgs()); - } + void dump() { print(dbgs()); } #endif }; - private: +private: struct TableHeaderData { uint32_t die_offset_base; SmallVector<Atom, 1> Atoms; TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0) - : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) { } + : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {} #ifndef NDEBUG - void print (raw_ostream &O) { + void print(raw_ostream &O) { O << "die_offset_base: " << die_offset_base << "\n"; for (size_t i = 0; i < Atoms.size(); i++) Atoms[i].print(O); } - void dump() { - print(dbgs()); - } + void dump() { print(dbgs()); } #endif }; @@ -193,37 +165,38 @@ public: // HashData[hash_data_count] public: struct HashDataContents { - DIE *Die; // Offsets + DIE *Die; // Offsets char Flags; // Specific flags to output - HashDataContents(DIE *D, char Flags) : - Die(D), - Flags(Flags) { } - #ifndef NDEBUG + HashDataContents(DIE *D, char Flags) : Die(D), Flags(Flags) {} +#ifndef NDEBUG void print(raw_ostream &O) const { O << " Offset: " << Die->getOffset() << "\n"; O << " Tag: " << dwarf::TagString(Die->getTag()) << "\n"; O << " Flags: " << Flags << "\n"; } - #endif +#endif }; + private: struct HashData { StringRef Str; uint32_t HashValue; MCSymbol *Sym; - ArrayRef<HashDataContents*> Data; // offsets - HashData(StringRef S, ArrayRef<HashDataContents*> Data) - : Str(S), Data(Data) { + ArrayRef<HashDataContents *> Data; // offsets + HashData(StringRef S, ArrayRef<HashDataContents *> Data) + : Str(S), Data(Data) { HashValue = DwarfAccelTable::HashDJB(S); } - #ifndef NDEBUG +#ifndef NDEBUG void print(raw_ostream &O) { O << "Name: " << Str << "\n"; O << " Hash Value: " << format("0x%x", HashValue) << "\n"; - O << " Symbol: " ; - if (Sym) Sym->print(O); - else O << "<none>"; + O << " Symbol: "; + if (Sym) + Sym->print(O); + else + O << "<none>"; O << "\n"; for (size_t i = 0; i < Data.size(); i++) { O << " Offset: " << Data[i]->Die->getOffset() << "\n"; @@ -231,14 +204,12 @@ private: O << " Flags: " << Data[i]->Flags << "\n"; } } - void dump() { - print(dbgs()); - } - #endif + void dump() { print(dbgs()); } +#endif }; - DwarfAccelTable(const DwarfAccelTable&) LLVM_DELETED_FUNCTION; - void operator=(const DwarfAccelTable&) LLVM_DELETED_FUNCTION; + DwarfAccelTable(const DwarfAccelTable &) LLVM_DELETED_FUNCTION; + void operator=(const DwarfAccelTable &) LLVM_DELETED_FUNCTION; // Internal Functions void EmitHeader(AsmPrinter *); @@ -253,31 +224,30 @@ private: // Output Variables TableHeader Header; TableHeaderData HeaderData; - std::vector<HashData*> Data; + std::vector<HashData *> Data; // String Data - typedef std::vector<HashDataContents*> DataArray; - typedef StringMap<DataArray, BumpPtrAllocator&> StringEntries; + typedef std::vector<HashDataContents *> DataArray; + typedef StringMap<DataArray, BumpPtrAllocator &> StringEntries; StringEntries Entries; // Buckets/Hashes/Offsets - typedef std::vector<HashData*> HashList; + typedef std::vector<HashData *> HashList; typedef std::vector<HashList> BucketList; BucketList Buckets; HashList Hashes; // Public Implementation - public: +public: DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>); ~DwarfAccelTable(); - void AddName(StringRef, DIE*, char = 0); - void FinalizeTable(AsmPrinter *, const char *); + void AddName(StringRef, DIE *, char = 0); + void FinalizeTable(AsmPrinter *, StringRef); void Emit(AsmPrinter *, MCSymbol *, DwarfUnits *); #ifndef NDEBUG void print(raw_ostream &O); void dump() { print(dbgs()); } #endif }; - } #endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index fec5ced..8918f3d 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -68,7 +68,7 @@ void DwarfCFIException::EndModule() { for (size_t i = 0, e = Personalities.size(); i != e; ++i) { if (!Personalities[i]) continue; - MCSymbol *Sym = Asm->Mang->getSymbol(Personalities[i]); + MCSymbol *Sym = Asm->getSymbol(Personalities[i]); TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym); AtLeastOne = true; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 89abcff..97ef687 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -22,21 +22,23 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; /// CompileUnit - Compile unit constructor. -CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, - DwarfDebug *DW, DwarfUnits *DWU) - : UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU), - IndexTyDie(0), DebugInfoOffset(0) { +CompileUnit::CompileUnit(unsigned UID, DIE *D, DICompileUnit Node, + AsmPrinter *A, DwarfDebug *DW, DwarfUnits *DWU) + : UniqueID(UID), Node(Node), CUDie(D), Asm(A), DD(DW), DU(DWU), + IndexTyDie(0), DebugInfoOffset(0) { DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); + insertDIE(Node, D); } /// ~CompileUnit - Destructor for compile unit. @@ -55,7 +57,7 @@ DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) { /// getDefaultLowerBound - Return the default lower bound for an array. If the /// DWARF version doesn't handle the language, return -1. int64_t CompileUnit::getDefaultLowerBound() const { - switch (Language) { + switch (getLanguage()) { default: break; @@ -96,32 +98,71 @@ int64_t CompileUnit::getDefaultLowerBound() const { return -1; } +/// Check whether the DIE for this MDNode can be shared across CUs. +static bool isShareableAcrossCUs(DIDescriptor D) { + // When the MDNode can be part of the type system, the DIE can be + // shared across CUs. + return D.isType() || + (D.isSubprogram() && !DISubprogram(D).isDefinition()); +} + +/// getDIE - Returns the debug information entry map slot for the +/// specified debug variable. We delegate the request to DwarfDebug +/// when the DIE for this MDNode can be shared across CUs. The mappings +/// will be kept in DwarfDebug for shareable DIEs. +DIE *CompileUnit::getDIE(DIDescriptor D) const { + if (isShareableAcrossCUs(D)) + return DD->getDIE(D); + return MDNodeToDieMap.lookup(D); +} + +/// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug +/// when the DIE for this MDNode can be shared across CUs. The mappings +/// will be kept in DwarfDebug for shareable DIEs. +void CompileUnit::insertDIE(DIDescriptor Desc, DIE *D) { + if (isShareableAcrossCUs(Desc)) { + DD->insertDIE(Desc, D); + return; + } + MDNodeToDieMap.insert(std::make_pair(Desc, D)); +} + /// addFlag - Add a flag that is true. -void CompileUnit::addFlag(DIE *Die, unsigned Attribute) { - if (!DD->useDarwinGDBCompat()) - Die->addValue(Attribute, dwarf::DW_FORM_flag_present, - DIEIntegerOne); +void CompileUnit::addFlag(DIE *Die, dwarf::Attribute Attribute) { + if (DD->getDwarfVersion() >= 4) + Die->addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne); else - addUInt(Die, Attribute, dwarf::DW_FORM_flag, 1); + Die->addValue(Attribute, dwarf::DW_FORM_flag, DIEIntegerOne); } /// addUInt - Add an unsigned integer attribute data and value. /// -void CompileUnit::addUInt(DIE *Die, unsigned Attribute, - unsigned Form, uint64_t Integer) { - if (!Form) Form = DIEInteger::BestForm(false, Integer); - DIEValue *Value = Integer == 1 ? - DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, Form, Value); +void CompileUnit::addUInt(DIE *Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, uint64_t Integer) { + if (!Form) + Form = DIEInteger::BestForm(false, Integer); + DIEValue *Value = Integer == 1 ? DIEIntegerOne : new (DIEValueAllocator) + DIEInteger(Integer); + Die->addValue(Attribute, *Form, Value); +} + +void CompileUnit::addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer) { + addUInt(Block, (dwarf::Attribute)0, Form, Integer); } /// addSInt - Add an signed integer attribute data and value. /// -void CompileUnit::addSInt(DIE *Die, unsigned Attribute, - unsigned Form, int64_t Integer) { - if (!Form) Form = DIEInteger::BestForm(true, Integer); +void CompileUnit::addSInt(DIE *Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, int64_t Integer) { + if (!Form) + Form = DIEInteger::BestForm(true, Integer); DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, Form, Value); + Die->addValue(Attribute, *Form, Value); +} + +void CompileUnit::addSInt(DIEBlock *Die, Optional<dwarf::Form> Form, + int64_t Integer) { + addSInt(Die, (dwarf::Attribute)0, Form, Integer); } /// addString - Add a string attribute data and value. We always emit a @@ -129,27 +170,31 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute, /// more predictable sizes. In the case of split dwarf we emit an index /// into another table which gets us the static offset into the string /// table. -void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) { +void CompileUnit::addString(DIE *Die, dwarf::Attribute Attribute, + StringRef String) { + DIEValue *Value; + dwarf::Form Form; if (!DD->useSplitDwarf()) { MCSymbol *Symb = DU->getStringPoolEntry(String); - DIEValue *Value; if (Asm->needsRelocationsForDwarfStringPool()) Value = new (DIEValueAllocator) DIELabel(Symb); else { MCSymbol *StringPool = DU->getStringPoolSym(); Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); } - Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); + Form = dwarf::DW_FORM_strp; } else { unsigned idx = DU->getStringPoolIndex(String); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); - Die->addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Value); + Value = new (DIEValueAllocator) DIEInteger(idx); + Form = dwarf::DW_FORM_GNU_str_index; } + DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); + Die->addValue(Attribute, Form, Str); } /// addLocalString - Add a string attribute data and value. This is guaranteed /// to be in the local string pool instead of indirected. -void CompileUnit::addLocalString(DIE *Die, unsigned Attribute, +void CompileUnit::addLocalString(DIE *Die, dwarf::Attribute Attribute, StringRef String) { MCSymbol *Symb = DU->getStringPoolEntry(String); DIEValue *Value; @@ -162,19 +207,54 @@ void CompileUnit::addLocalString(DIE *Die, unsigned Attribute, Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); } +/// addExpr - Add a Dwarf expression attribute data and value. +/// +void CompileUnit::addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr) { + DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr); + Die->addValue((dwarf::Attribute)0, Form, Value); +} + /// addLabel - Add a Dwarf label attribute data and value. /// -void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Label) { +void CompileUnit::addLabel(DIE *Die, dwarf::Attribute Attribute, + dwarf::Form Form, const MCSymbol *Label) { DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); Die->addValue(Attribute, Form, Value); } +void CompileUnit::addLabel(DIEBlock *Die, dwarf::Form Form, + const MCSymbol *Label) { + addLabel(Die, (dwarf::Attribute)0, Form, Label); +} + +/// addSectionLabel - Add a Dwarf section label attribute data and value. +/// +void CompileUnit::addSectionLabel(DIE *Die, dwarf::Attribute Attribute, + const MCSymbol *Label) { + if (DD->getDwarfVersion() >= 4) + addLabel(Die, Attribute, dwarf::DW_FORM_sec_offset, Label); + else + addLabel(Die, Attribute, dwarf::DW_FORM_data4, Label); +} + +/// addSectionOffset - Add an offset into a section attribute data and value. +/// +void CompileUnit::addSectionOffset(DIE *Die, dwarf::Attribute Attribute, + uint64_t Integer) { + if (DD->getDwarfVersion() >= 4) + addUInt(Die, Attribute, dwarf::DW_FORM_sec_offset, Integer); + else + addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer); +} + /// addLabelAddress - Add a dwarf label attribute data and value using /// DW_FORM_addr or DW_FORM_GNU_addr_index. /// -void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute, +void CompileUnit::addLabelAddress(DIE *Die, dwarf::Attribute Attribute, MCSymbol *Label) { + if (Label) + DD->addArangeLabel(SymbolCU(this, Label)); + if (!DD->useSplitDwarf()) { if (Label != NULL) { DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); @@ -193,37 +273,62 @@ void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute, /// addOpAddress - Add a dwarf op address data and value using the /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. /// -void CompileUnit::addOpAddress(DIE *Die, MCSymbol *Sym) { - +void CompileUnit::addOpAddress(DIEBlock *Die, const MCSymbol *Sym) { + DD->addArangeLabel(SymbolCU(this, Sym)); if (!DD->useSplitDwarf()) { - addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Die, 0, dwarf::DW_FORM_udata, Sym); + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Die, dwarf::DW_FORM_udata, Sym); } else { - unsigned idx = DU->getAddrPoolIndex(Sym); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); - addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); - Die->addValue(0, dwarf::DW_FORM_GNU_addr_index, Value); + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); + addUInt(Die, dwarf::DW_FORM_GNU_addr_index, DU->getAddrPoolIndex(Sym)); } } -/// addDelta - Add a label delta attribute data and value. +/// addSectionDelta - Add a section label delta attribute data and value. /// -void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Hi, const MCSymbol *Lo) { +void CompileUnit::addSectionDelta(DIE *Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo) { DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); - Die->addValue(Attribute, Form, Value); + if (DD->getDwarfVersion() >= 4) + Die->addValue(Attribute, dwarf::DW_FORM_sec_offset, Value); + else + Die->addValue(Attribute, dwarf::DW_FORM_data4, Value); } /// addDIEEntry - Add a DIE attribute data and value. /// -void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, +void CompileUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry) { - Die->addValue(Attribute, Form, createDIEEntry(Entry)); + addDIEEntry(Die, Attribute, createDIEEntry(Entry)); +} + +void CompileUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, + DIEEntry *Entry) { + const DIE *DieCU = Die->getCompileUnitOrNull(); + const DIE *EntryCU = Entry->getEntry()->getCompileUnitOrNull(); + if (!DieCU) + // We assume that Die belongs to this CU, if it is not linked to any CU yet. + DieCU = getCUDie(); + if (!EntryCU) + EntryCU = getCUDie(); + Die->addValue(Attribute, EntryCU == DieCU ? dwarf::DW_FORM_ref4 + : dwarf::DW_FORM_ref_addr, + Entry); +} + +/// Create a DIE with the given Tag, add the DIE to its parent, and +/// call insertDIE if MD is not null. +DIE *CompileUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) { + DIE *Die = new DIE(Tag); + Parent.addChild(Die); + if (N) + insertDIE(N, Die); + return Die; } /// addBlock - Add block data. /// -void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form, +void CompileUnit::addBlock(DIE *Die, dwarf::Attribute Attribute, DIEBlock *Block) { Block->ComputeSize(Asm); DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. @@ -234,42 +339,42 @@ void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form, /// entry. void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { // Verify variable. - if (!V.Verify()) + if (!V.isVariable()) return; unsigned Line = V.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(V.getContext().getFilename(), - V.getContext().getDirectory(), - getUniqueID()); + unsigned FileID = + DD->getOrCreateSourceID(V.getContext().getFilename(), + V.getContext().getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information /// entry. void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { // Verify global variable. - if (!G.Verify()) + if (!G.isGlobalVariable()) return; unsigned Line = G.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(), - getUniqueID()); + unsigned FileID = + DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information /// entry. void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { // Verify subprogram. - if (!SP.Verify()) + if (!SP.isSubprogram()) return; // If the line number is 0, don't add it. @@ -277,35 +382,35 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(), - SP.getDirectory(), getUniqueID()); + unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(), SP.getDirectory(), + getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information /// entry. void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { // Verify type. - if (!Ty.Verify()) + if (!Ty.isType()) return; unsigned Line = Ty.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(), - Ty.getDirectory(), getUniqueID()); + unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(), Ty.getDirectory(), + getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information /// entry. void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { // Verify type. - if (!Ty.Verify()) + if (!Ty.isObjCProperty()) return; unsigned Line = Ty.getLineNumber(); @@ -315,8 +420,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { unsigned FileID = DD->getOrCreateSourceID(File.getFilename(), File.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information @@ -331,68 +436,73 @@ void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) { return; StringRef FN = NS.getFilename(); - unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory(), - getUniqueID()); + unsigned FileID = + DD->getOrCreateSourceID(FN, NS.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addVariableAddress - Add DW_AT_location attribute for a /// DbgVariable based on provided MachineLocation. -void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die, +void CompileUnit::addVariableAddress(const DbgVariable &DV, DIE *Die, MachineLocation Location) { - if (DV->variableHasComplexAddress()) + if (DV.variableHasComplexAddress()) addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); - else if (DV->isBlockByrefVariable()) + else if (DV.isBlockByrefVariable()) addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); else - addAddress(Die, dwarf::DW_AT_location, Location); + addAddress(Die, dwarf::DW_AT_location, Location, + DV.getVariable().isIndirect()); } /// addRegisterOp - Add register operand. -void CompileUnit::addRegisterOp(DIE *TheDie, unsigned Reg) { +void CompileUnit::addRegisterOp(DIEBlock *TheDie, unsigned Reg) { const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); unsigned DWReg = RI->getDwarfRegNum(Reg, false); if (DWReg < 32) - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); else { - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + addUInt(TheDie, dwarf::DW_FORM_udata, DWReg); } } /// addRegisterOffset - Add register offset. -void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg, +void CompileUnit::addRegisterOffset(DIEBlock *TheDie, unsigned Reg, int64_t Offset) { const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); unsigned DWReg = RI->getDwarfRegNum(Reg, false); const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); if (Reg == TRI->getFrameRegister(*Asm->MF)) // If variable offset is based in frame register then use fbreg. - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); else if (DWReg < 32) - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg); else { - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(TheDie, dwarf::DW_FORM_udata, DWReg); } - addSInt(TheDie, 0, dwarf::DW_FORM_sdata, Offset); + addSInt(TheDie, dwarf::DW_FORM_sdata, Offset); } /// addAddress - Add an address attribute to a die based on the location /// provided. -void CompileUnit::addAddress(DIE *Die, unsigned Attribute, - const MachineLocation &Location) { +void CompileUnit::addAddress(DIE *Die, dwarf::Attribute Attribute, + const MachineLocation &Location, bool Indirect) { DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - if (Location.isReg()) + if (Location.isReg() && !Indirect) addRegisterOp(Block, Location.getReg()); - else + else { addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + if (Indirect && !Location.isReg()) { + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } + } // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); + addBlock(Die, Attribute, Block); } /// addComplexAddress - Start with the address based on the location provided, @@ -400,37 +510,37 @@ void CompileUnit::addAddress(DIE *Die, unsigned Attribute, /// given the extra address information encoded in the DIVariable, starting from /// the starting location. Add the DWARF information to the die. /// -void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die, - unsigned Attribute, +void CompileUnit::addComplexAddress(const DbgVariable &DV, DIE *Die, + dwarf::Attribute Attribute, const MachineLocation &Location) { DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - unsigned N = DV->getNumAddrElements(); + unsigned N = DV.getNumAddrElements(); unsigned i = 0; if (Location.isReg()) { - if (N >= 2 && DV->getAddrElement(0) == DIBuilder::OpPlus) { + if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { // If first address element is OpPlus then emit // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - addRegisterOffset(Block, Location.getReg(), DV->getAddrElement(1)); + addRegisterOffset(Block, Location.getReg(), DV.getAddrElement(1)); i = 2; } else addRegisterOp(Block, Location.getReg()); - } - else + } else addRegisterOffset(Block, Location.getReg(), Location.getOffset()); - for (;i < N; ++i) { - uint64_t Element = DV->getAddrElement(i); + for (; i < N; ++i) { + uint64_t Element = DV.getAddrElement(i); if (Element == DIBuilder::OpPlus) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, dwarf::DW_FORM_udata, DV.getAddrElement(++i)); } else if (Element == DIBuilder::OpDeref) { if (!Location.isReg()) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - } else llvm_unreachable("unknown DIBuilder Opcode"); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } else + llvm_unreachable("unknown DIBuilder Opcode"); } // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); + addBlock(Die, Attribute, Block); } /* Byref variables, in Blocks, are declared by the programmer as "SomeType @@ -493,45 +603,42 @@ void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die, /// starting location. Add the DWARF information to the die. For /// more information, read large comment just above here. /// -void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, - unsigned Attribute, +void CompileUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die, + dwarf::Attribute Attribute, const MachineLocation &Location) { - DIType Ty = DV->getType(); + DIType Ty = DV.getType(); DIType TmpTy = Ty; - unsigned Tag = Ty.getTag(); + uint16_t Tag = Ty.getTag(); bool isPointer = false; - StringRef varName = DV->getName(); + StringRef varName = DV.getName(); if (Tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - TmpTy = DTy.getTypeDerivedFrom(); + DIDerivedType DTy(Ty); + TmpTy = resolve(DTy.getTypeDerivedFrom()); isPointer = true; } - DICompositeType blockStruct = DICompositeType(TmpTy); + DICompositeType blockStruct(TmpTy); // Find the __forwarding field and the variable field in the __Block_byref // struct. DIArray Fields = blockStruct.getTypeArray(); - DIDescriptor varField = DIDescriptor(); - DIDescriptor forwardingField = DIDescriptor(); + DIDerivedType varField; + DIDerivedType forwardingField; for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { - DIDescriptor Element = Fields.getElement(i); - DIDerivedType DT = DIDerivedType(Element); + DIDerivedType DT(Fields.getElement(i)); StringRef fieldName = DT.getName(); if (fieldName == "__forwarding") - forwardingField = Element; + forwardingField = DT; else if (fieldName == varName) - varField = Element; + varField = DT; } // Get the offsets for the forwarding field and the variable field. - unsigned forwardingFieldOffset = - DIDerivedType(forwardingField).getOffsetInBits() >> 3; - unsigned varFieldOffset = - DIDerivedType(varField).getOffsetInBits() >> 3; + unsigned forwardingFieldOffset = forwardingField.getOffsetInBits() >> 3; + unsigned varFieldOffset = varField.getOffsetInBits() >> 2; // Decode the original location, and use that as the start of the byref // variable's location. @@ -545,76 +652,139 @@ void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, // If we started with a pointer to the __Block_byref... struct, then // the first thing we need to do is dereference the pointer (DW_OP_deref). if (isPointer) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); // Next add the offset for the '__forwarding' field: // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in // adding the offset if it's 0. if (forwardingFieldOffset > 0) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, dwarf::DW_FORM_udata, forwardingFieldOffset); } // Now dereference the __forwarding field to get to the real __Block_byref // struct: DW_OP_deref. - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); // Now that we've got the real __Block_byref... struct, add the offset // for the variable's field to get to the location of the actual variable: // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. if (varFieldOffset > 0) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, dwarf::DW_FORM_udata, varFieldOffset); } // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); + addBlock(Die, Attribute, Block); } /// isTypeSigned - Return true if the type is signed. -static bool isTypeSigned(DIType Ty, int *SizeInBits) { +static bool isTypeSigned(DwarfDebug *DD, DIType Ty, int *SizeInBits) { if (Ty.isDerivedType()) - return isTypeSigned(DIDerivedType(Ty).getTypeDerivedFrom(), SizeInBits); + return isTypeSigned(DD, DD->resolve(DIDerivedType(Ty).getTypeDerivedFrom()), + SizeInBits); if (Ty.isBasicType()) - if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed - || DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) { + if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed || + DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) { *SizeInBits = Ty.getSizeInBits(); return true; } return false; } +/// Return true if type encoding is unsigned. +static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { + DIDerivedType DTy(Ty); + if (DTy.isDerivedType()) + return isUnsignedDIType(DD, DD->resolve(DTy.getTypeDerivedFrom())); + + DIBasicType BTy(Ty); + if (BTy.isBasicType()) { + unsigned Encoding = BTy.getEncoding(); + if (Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_boolean) + return true; + } + return false; +} + +/// If this type is derived from a base type then return base type size. +static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) { + unsigned Tag = Ty.getTag(); + + if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && + Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && + Tag != dwarf::DW_TAG_restrict_type) + return Ty.getSizeInBits(); + + DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom()); + + // If this type is not derived from any type then take conservative approach. + if (!BaseType.isValid()) + return Ty.getSizeInBits(); + + // If this is a derived type, go ahead and get the base type, unless it's a + // reference then it's just the size of the field. Pointer types have no need + // of this since they're a different type of qualification on the type. + if (BaseType.getTag() == dwarf::DW_TAG_reference_type || + BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type) + return Ty.getSizeInBits(); + + if (BaseType.isDerivedType()) + return getBaseTypeSize(DD, DIDerivedType(BaseType)); + + return BaseType.getSizeInBits(); +} + /// addConstantValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, +void CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty) { + // FIXME: This is a bit conservative/simple - it emits negative values at + // their maximum bit width which is a bit unfortunate (& doesn't prefer + // udata/sdata over dataN as suggested by the DWARF spec) assert(MO.isImm() && "Invalid machine operand!"); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); int SizeInBits = -1; - bool SignedConstant = isTypeSigned(Ty, &SizeInBits); - unsigned Form = SignedConstant ? dwarf::DW_FORM_sdata : dwarf::DW_FORM_udata; - switch (SizeInBits) { - case 8: Form = dwarf::DW_FORM_data1; break; - case 16: Form = dwarf::DW_FORM_data2; break; - case 32: Form = dwarf::DW_FORM_data4; break; - case 64: Form = dwarf::DW_FORM_data8; break; - default: break; + bool SignedConstant = isTypeSigned(DD, Ty, &SizeInBits); + dwarf::Form Form; + + // If we're a signed constant definitely use sdata. + if (SignedConstant) { + addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, MO.getImm()); + return; } - SignedConstant ? addSInt(Block, 0, Form, MO.getImm()) - : addUInt(Block, 0, Form, MO.getImm()); - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; + // Else use data for now unless it's larger than we can deal with. + switch (SizeInBits) { + case 8: + Form = dwarf::DW_FORM_data1; + break; + case 16: + Form = dwarf::DW_FORM_data2; + break; + case 32: + Form = dwarf::DW_FORM_data4; + break; + case 64: + Form = dwarf::DW_FORM_data8; + break; + default: + Form = dwarf::DW_FORM_udata; + addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm()); + return; + } + addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm()); } /// addConstantFPValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { - assert (MO.isFPImm() && "Invalid machine operand!"); +void CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { + assert(MO.isFPImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); APFloat FPImm = MO.getFPImm()->getValueAPF(); // Get the raw data form of the floating point. const APInt FltVal = FPImm.bitcastToAPInt(); - const char *FltPtr = (const char*)FltVal.getRawData(); + const char *FltPtr = (const char *)FltVal.getRawData(); int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. bool LittleEndian = Asm->getDataLayout().isLittleEndian(); @@ -624,43 +794,56 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { // Output the constant to DWARF one byte at a time. for (; Start != Stop; Start += Incr) - addUInt(Block, 0, dwarf::DW_FORM_data1, - (unsigned char)0xFF & FltPtr[Start]); + addUInt(Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]); - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; + addBlock(Die, dwarf::DW_AT_const_value, Block); } /// addConstantFPValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { - return addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), false); +void CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { + // Pass this down to addConstantValue as an unsigned bag of bits. + addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true); } /// addConstantValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, +void CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned) { - return addConstantValue(Die, CI->getValue(), Unsigned); + addConstantValue(Die, CI->getValue(), Unsigned); } // addConstantValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val, - bool Unsigned) { +void CompileUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) { unsigned CIBitWidth = Val.getBitWidth(); if (CIBitWidth <= 64) { - unsigned form = 0; + // If we're a signed constant definitely use sdata. + if (!Unsigned) { + addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, + Val.getSExtValue()); + return; + } + + // Else use data for now unless it's larger than we can deal with. + dwarf::Form Form; switch (CIBitWidth) { - case 8: form = dwarf::DW_FORM_data1; break; - case 16: form = dwarf::DW_FORM_data2; break; - case 32: form = dwarf::DW_FORM_data4; break; - case 64: form = dwarf::DW_FORM_data8; break; + case 8: + Form = dwarf::DW_FORM_data1; + break; + case 16: + Form = dwarf::DW_FORM_data2; + break; + case 32: + Form = dwarf::DW_FORM_data4; + break; + case 64: + Form = dwarf::DW_FORM_data8; + break; default: - form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata; + addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + Val.getZExtValue()); + return; } - if (Unsigned) - addUInt(Die, dwarf::DW_AT_const_value, form, Val.getZExtValue()); - else - addSInt(Die, dwarf::DW_AT_const_value, form, Val.getSExtValue()); - return true; + addUInt(Die, dwarf::DW_AT_const_value, Form, Val.getZExtValue()); + return; } DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); @@ -678,11 +861,10 @@ bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val, c = Ptr64[i / 8] >> (8 * (i & 7)); else c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7)); - addUInt(Block, 0, dwarf::DW_FORM_data1, c); + addUInt(Block, dwarf::DW_FORM_data1, c); } - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; + addBlock(Die, dwarf::DW_AT_const_value, Block); } /// addTemplateParams - Add template parameters into buffer. @@ -691,47 +873,48 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) { DIDescriptor Element = TParams.getElement(i); if (Element.isTemplateTypeParameter()) - Buffer.addChild(getOrCreateTemplateTypeParameterDIE( - DITemplateTypeParameter(Element))); + constructTemplateTypeParameterDIE(Buffer, + DITemplateTypeParameter(Element)); else if (Element.isTemplateValueParameter()) - Buffer.addChild(getOrCreateTemplateValueParameterDIE( - DITemplateValueParameter(Element))); + constructTemplateValueParameterDIE(Buffer, + DITemplateValueParameter(Element)); } } /// getOrCreateContextDIE - Get context owner's DIE. -DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) { +DIE *CompileUnit::getOrCreateContextDIE(DIScope Context) { + if (!Context || Context.isFile()) + return getCUDie(); if (Context.isType()) return getOrCreateTypeDIE(DIType(Context)); - else if (Context.isNameSpace()) + if (Context.isNameSpace()) return getOrCreateNameSpace(DINameSpace(Context)); - else if (Context.isSubprogram()) + if (Context.isSubprogram()) return getOrCreateSubprogramDIE(DISubprogram(Context)); - else - return getDIE(Context); -} - -/// addToContextOwner - Add Die into the list of its context owner's children. -void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { - if (DIE *ContextDIE = getOrCreateContextDIE(Context)) - ContextDIE->addChild(Die); - else - addDie(Die); + return getDIE(Context); } /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { - DIType Ty(TyNode); - if (!Ty.Verify()) + if (!TyNode) return NULL; + + DIType Ty(TyNode); + assert(Ty.isType()); + + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(resolve(Ty.getContext())); + assert(ContextDIE); + DIE *TyDIE = getDIE(Ty); if (TyDIE) return TyDIE; // Create new type. - TyDIE = new DIE(dwarf::DW_TAG_base_type); - insertDIE(Ty, TyDIE); + TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); + if (Ty.isBasicType()) constructTypeDIE(*TyDIE, DIBasicType(Ty)); else if (Ty.isCompositeType()) @@ -748,28 +931,24 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { DICompositeType CT(Ty); // A runtime language of 0 actually means C/C++ and that any // non-negative value is some version of Objective-C/C++. - IsImplementation = (CT.getRunTimeLang() == 0) || - CT.isObjcClassComplete(); + IsImplementation = (CT.getRunTimeLang() == 0) || CT.isObjcClassComplete(); } - unsigned Flags = IsImplementation ? - DwarfAccelTable::eTypeFlagClassIsImplementation : 0; + unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0; addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags)); } - addToContextOwner(TyDIE, Ty.getContext()); return TyDIE; } /// addType - Add a new type attribute to the specified entity. -void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) { - if (!Ty.Verify()) - return; +void CompileUnit::addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute) { + assert(Ty && "Trying to add a type that doesn't exist?"); // Check for pre-existence. DIEEntry *Entry = getDIEEntry(Ty); // If it exists then use the existing value. if (Entry) { - Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); + addDIEEntry(Entity, Attribute, Entry); return; } @@ -779,35 +958,112 @@ void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) { // Set up proxy. Entry = createDIEEntry(Buffer); insertDIEEntry(Ty, Entry); - Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); + addDIEEntry(Entity, Attribute, Entry); // If this is a complete composite type then include it in the // list of global types. addGlobalType(Ty); } +// Accelerator table mutators - add each name along with its companion +// DIE to the proper table while ensuring that the name that we're going +// to reference is in the string table. We do this since the names we +// add may not only be identical to the names in the DIE. +void CompileUnit::addAccelName(StringRef Name, DIE *Die) { + DU->getStringPoolEntry(Name); + std::vector<DIE *> &DIEs = AccelNames[Name]; + DIEs.push_back(Die); +} + +void CompileUnit::addAccelObjC(StringRef Name, DIE *Die) { + DU->getStringPoolEntry(Name); + std::vector<DIE *> &DIEs = AccelObjC[Name]; + DIEs.push_back(Die); +} + +void CompileUnit::addAccelNamespace(StringRef Name, DIE *Die) { + DU->getStringPoolEntry(Name); + std::vector<DIE *> &DIEs = AccelNamespace[Name]; + DIEs.push_back(Die); +} + +void CompileUnit::addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) { + DU->getStringPoolEntry(Name); + std::vector<std::pair<DIE *, unsigned> > &DIEs = AccelTypes[Name]; + DIEs.push_back(Die); +} + +/// addGlobalName - Add a new global name to the compile unit. +void CompileUnit::addGlobalName(StringRef Name, DIE *Die, DIScope Context) { + std::string FullName = getParentContextString(Context) + Name.str(); + GlobalNames[FullName] = Die; +} + /// addGlobalType - Add a new global type to the compile unit. /// void CompileUnit::addGlobalType(DIType Ty) { - DIDescriptor Context = Ty.getContext(); - if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl() - && (!Context || Context.isCompileUnit() || Context.isFile() - || Context.isNameSpace())) - if (DIEEntry *Entry = getDIEEntry(Ty)) - GlobalTypes[Ty.getName()] = Entry->getEntry(); + DIScope Context = resolve(Ty.getContext()); + if (!Ty.getName().empty() && !Ty.isForwardDecl() && + (!Context || Context.isCompileUnit() || Context.isFile() || + Context.isNameSpace())) + if (DIEEntry *Entry = getDIEEntry(Ty)) { + std::string FullName = + getParentContextString(Context) + Ty.getName().str(); + GlobalTypes[FullName] = Entry->getEntry(); + } +} + +/// getParentContextString - Walks the metadata parent chain in a language +/// specific manner (using the compile unit language) and returns +/// it as a string. This is done at the metadata level because DIEs may +/// not currently have been added to the parent context and walking the +/// DIEs looking for names is more expensive than walking the metadata. +std::string CompileUnit::getParentContextString(DIScope Context) const { + if (!Context) + return ""; + + // FIXME: Decide whether to implement this for non-C++ languages. + if (getLanguage() != dwarf::DW_LANG_C_plus_plus) + return ""; + + std::string CS; + SmallVector<DIScope, 1> Parents; + while (!Context.isCompileUnit()) { + Parents.push_back(Context); + if (Context.getContext()) + Context = resolve(Context.getContext()); + else + // Structure, etc types will have a NULL context if they're at the top + // level. + break; + } + + // Reverse iterate over our list to go from the outermost construct to the + // innermost. + for (SmallVectorImpl<DIScope>::reverse_iterator I = Parents.rbegin(), + E = Parents.rend(); + I != E; ++I) { + DIScope Ctx = *I; + StringRef Name = Ctx.getName(); + if (!Name.empty()) { + CS += Name; + CS += "::"; + } + } + return CS; } -/// addPubTypes - Add type for pubtypes section. +/// addPubTypes - Add subprogram argument types for pubtypes section. void CompileUnit::addPubTypes(DISubprogram SP) { DICompositeType SPTy = SP.getType(); - unsigned SPTag = SPTy.getTag(); + uint16_t SPTag = SPTy.getTag(); if (SPTag != dwarf::DW_TAG_subroutine_type) return; DIArray Args = SPTy.getTypeArray(); for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) { DIType ATy(Args.getElement(i)); - if (!ATy.Verify()) + if (!ATy.isType()) continue; addGlobalType(ATy); } @@ -821,18 +1077,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { if (!Name.empty()) addString(&Buffer, dwarf::DW_AT_name, Name); - if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) { - Buffer.setTag(dwarf::DW_TAG_unspecified_type); - // Unspecified types has only name, nothing else. + // An unspecified type only has a name attribute. + if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) return; - } - Buffer.setTag(dwarf::DW_TAG_base_type); addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, BTy.getEncoding()); uint64_t Size = BTy.getSizeInBits() >> 3; - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); } /// constructTypeDIE - Construct derived type die from DIDerivedType. @@ -840,16 +1093,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Get core information. StringRef Name = DTy.getName(); uint64_t Size = DTy.getSizeInBits() >> 3; - unsigned Tag = DTy.getTag(); - - // FIXME - Workaround for templates. - if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type; - - Buffer.setTag(Tag); + uint16_t Tag = Buffer.getTag(); // Map to main type, void will not have a type. - DIType FromTy = DTy.getTypeDerivedFrom(); - addType(&Buffer, FromTy); + DIType FromTy = resolve(DTy.getTypeDerivedFrom()); + if (FromTy) + addType(&Buffer, FromTy); // Add name if not anonymous or intermediate type. if (!Name.empty()) @@ -857,97 +1106,102 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Add size if non-zero (derived types might be zero-sized.) if (Size && Tag != dwarf::DW_TAG_pointer_type) - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); if (Tag == dwarf::DW_TAG_ptr_to_member_type) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, - getOrCreateTypeDIE(DTy.getClassType())); + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, + getOrCreateTypeDIE(resolve(DTy.getClassType()))); // Add source line info if available and TyDesc is not a forward declaration. if (!DTy.isForwardDecl()) addSourceLine(&Buffer, DTy); } +/// Return true if the type is appropriately scoped to be contained inside +/// its own type unit. +static bool isTypeUnitScoped(DIType Ty, const DwarfDebug *DD) { + DIScope Parent = DD->resolve(Ty.getContext()); + while (Parent) { + // Don't generate a hash for anything scoped inside a function. + if (Parent.isSubprogram()) + return false; + Parent = DD->resolve(Parent.getContext()); + } + return true; +} + +/// Return true if the type should be split out into a type unit. +static bool shouldCreateTypeUnit(DICompositeType CTy, const DwarfDebug *DD) { + uint16_t Tag = CTy.getTag(); + + switch (Tag) { + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_enumeration_type: + case dwarf::DW_TAG_class_type: + // If this is a class, structure, union, or enumeration type + // that is a definition (not a declaration), and not scoped + // inside a function then separate this out as a type unit. + return !CTy.isForwardDecl() && isTypeUnitScoped(CTy, DD); + default: + return false; + } +} + /// constructTypeDIE - Construct type DIE from DICompositeType. void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Get core information. StringRef Name = CTy.getName(); uint64_t Size = CTy.getSizeInBits() >> 3; - unsigned Tag = CTy.getTag(); - Buffer.setTag(Tag); + uint16_t Tag = Buffer.getTag(); switch (Tag) { case dwarf::DW_TAG_array_type: - constructArrayTypeDIE(Buffer, &CTy); + constructArrayTypeDIE(Buffer, CTy); break; - case dwarf::DW_TAG_enumeration_type: { - DIArray Elements = CTy.getTypeArray(); - - // Add enumerators to enumeration type. - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIE *ElemDie = NULL; - DIDescriptor Enum(Elements.getElement(i)); - if (Enum.isEnumerator()) { - ElemDie = constructEnumTypeDIE(DIEnumerator(Enum)); - Buffer.addChild(ElemDie); - } - } - DIType DTy = CTy.getTypeDerivedFrom(); - if (DTy.Verify()) { - addType(&Buffer, DTy); - addUInt(&Buffer, dwarf::DW_AT_enum_class, dwarf::DW_FORM_flag, 1); - } - } + case dwarf::DW_TAG_enumeration_type: + constructEnumTypeDIE(Buffer, CTy); break; case dwarf::DW_TAG_subroutine_type: { - // Add return type. + // Add return type. A void return won't have a type. DIArray Elements = CTy.getTypeArray(); - DIDescriptor RTy = Elements.getElement(0); - addType(&Buffer, DIType(RTy)); + DIType RTy(Elements.getElement(0)); + if (RTy) + addType(&Buffer, RTy); bool isPrototyped = true; // Add arguments. for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Ty = Elements.getElement(i); if (Ty.isUnspecifiedParameter()) { - DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters); - Buffer.addChild(Arg); + createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer); isPrototyped = false; } else { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer); addType(Arg, DIType(Ty)); if (DIType(Ty).isArtificial()) addFlag(Arg, dwarf::DW_AT_artificial); - Buffer.addChild(Arg); } } // Add prototype flag if we're dealing with a C language and the // function has been prototyped. + uint16_t Language = getLanguage(); if (isPrototyped && - (Language == dwarf::DW_LANG_C89 || - Language == dwarf::DW_LANG_C99 || + (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) addFlag(&Buffer, dwarf::DW_AT_prototyped); - } - break; + } break; case dwarf::DW_TAG_structure_type: case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_class_type: { // Add elements to structure type. DIArray Elements = CTy.getTypeArray(); - - // A forward struct declared type may not have elements available. - unsigned N = Elements.getNumElements(); - if (N == 0) - break; - - // Add elements to structure type. - for (unsigned i = 0; i < N; ++i) { + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); DIE *ElemDie = NULL; if (Element.isSubprogram()) { DISubprogram SP(Element); - ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element)); + ElemDie = getOrCreateSubprogramDIE(SP); if (SP.isProtected()) addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_protected); @@ -956,21 +1210,23 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { dwarf::DW_ACCESS_private); else addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_public); + dwarf::DW_ACCESS_public); if (SP.isExplicit()) addFlag(ElemDie, dwarf::DW_AT_explicit); } else if (Element.isDerivedType()) { DIDerivedType DDTy(Element); if (DDTy.getTag() == dwarf::DW_TAG_friend) { - ElemDie = new DIE(dwarf::DW_TAG_friend); - addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend); - } else if (DDTy.isStaticMember()) - ElemDie = createStaticMemberDIE(DDTy); - else - ElemDie = createMemberDIE(DDTy); + ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer); + addType(ElemDie, resolve(DDTy.getTypeDerivedFrom()), + dwarf::DW_AT_friend); + } else if (DDTy.isStaticMember()) { + getOrCreateStaticMemberDIE(DDTy); + } else { + constructMemberDIE(Buffer, DDTy); + } } else if (Element.isObjCProperty()) { DIObjCProperty Property(Element); - ElemDie = new DIE(Property.getTag()); + ElemDie = createAndAddDIE(Property.getTag(), Buffer); StringRef PropertyName = Property.getObjCPropertyName(); addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); addType(ElemDie, Property.getType()); @@ -995,8 +1251,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (Property.isNonAtomicObjCProperty()) PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; if (PropertyAttributes) - addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0, - PropertyAttributes); + addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None, + PropertyAttributes); DIEEntry *Entry = getDIEEntry(Element); if (!Entry) { @@ -1005,20 +1261,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } } else continue; - Buffer.addChild(ElemDie); } if (CTy.isAppleBlockExtension()) addFlag(&Buffer, dwarf::DW_AT_APPLE_block); - DICompositeType ContainingType = CTy.getContainingType(); - if (DIDescriptor(ContainingType).isCompositeType()) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, - getOrCreateTypeDIE(DIType(ContainingType))); - else { - DIDescriptor Context = CTy.getContext(); - addToContextOwner(&Buffer, Context); - } + DICompositeType ContainingType(resolve(CTy.getContainingType())); + if (ContainingType) + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, + getOrCreateTypeDIE(ContainingType)); if (CTy.isObjcClassComplete()) addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type); @@ -1026,8 +1277,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add template parameters to a class, structure or union types. // FIXME: The support isn't in the metadata for this yet. if (Tag == dwarf::DW_TAG_class_type || - Tag == dwarf::DW_TAG_structure_type || - Tag == dwarf::DW_TAG_union_type) + Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) addTemplateParams(Buffer, CTy.getTemplateParams()); break; @@ -1041,16 +1291,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addString(&Buffer, dwarf::DW_AT_name, Name); if (Tag == dwarf::DW_TAG_enumeration_type || - Tag == dwarf::DW_TAG_class_type || - Tag == dwarf::DW_TAG_structure_type || + Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { // Add size if non-zero (derived types might be zero-sized.) // TODO: Do we care about size for enum forward declarations? if (Size) - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); else if (!CTy.isForwardDecl()) // Add zero size if it is not a forward declaration. - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, 0); // If we're a forward decl, say so. if (CTy.isForwardDecl()) @@ -1063,117 +1312,128 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // No harm in adding the runtime language to the declaration. unsigned RLang = CTy.getRunTimeLang(); if (RLang) - addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, - dwarf::DW_FORM_data1, RLang); + addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, + RLang); } + // If this is a type applicable to a type unit it then add it to the + // list of types we'll compute a hash for later. + if (shouldCreateTypeUnit(CTy, DD)) + DD->addTypeUnitType(&Buffer); } -/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE -/// for the given DITemplateTypeParameter. -DIE * -CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { - DIE *ParamDIE = getDIE(TP); - if (ParamDIE) - return ParamDIE; - - ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); - addType(ParamDIE, TP.getType()); - addString(ParamDIE, dwarf::DW_AT_name, TP.getName()); - return ParamDIE; +/// constructTemplateTypeParameterDIE - Construct new DIE for the given +/// DITemplateTypeParameter. +void +CompileUnit::constructTemplateTypeParameterDIE(DIE &Buffer, + DITemplateTypeParameter TP) { + DIE *ParamDIE = + createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer); + // Add the type if it exists, it could be void and therefore no type. + if (TP.getType()) + addType(ParamDIE, resolve(TP.getType())); + if (!TP.getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, TP.getName()); } -/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE -/// for the given DITemplateValueParameter. -DIE * -CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){ - DIE *ParamDIE = getDIE(TPV); - if (ParamDIE) - return ParamDIE; - - ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter); - addType(ParamDIE, TPV.getType()); - if (!TPV.getName().empty()) - addString(ParamDIE, dwarf::DW_AT_name, TPV.getName()); - addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, - TPV.getValue()); - return ParamDIE; +/// constructTemplateValueParameterDIE - Construct new DIE for the given +/// DITemplateValueParameter. +void +CompileUnit::constructTemplateValueParameterDIE(DIE &Buffer, + DITemplateValueParameter VP) { + DIE *ParamDIE = createAndAddDIE(VP.getTag(), Buffer); + + // Add the type if there is one, template template and template parameter + // packs will not have a type. + if (VP.getTag() == dwarf::DW_TAG_template_value_parameter) + addType(ParamDIE, resolve(VP.getType())); + if (!VP.getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, VP.getName()); + if (Value *Val = VP.getValue()) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) + addConstantValue(ParamDIE, CI, + isUnsignedDIType(DD, resolve(VP.getType()))); + else if (GlobalValue *GV = dyn_cast<GlobalValue>(Val)) { + // For declaration non-type template parameters (such as global values and + // functions) + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + addOpAddress(Block, Asm->getSymbol(GV)); + // Emit DW_OP_stack_value to use the address as the immediate value of the + // parameter, rather than a pointer to it. + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); + addBlock(ParamDIE, dwarf::DW_AT_location, Block); + } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_template_param) { + assert(isa<MDString>(Val)); + addString(ParamDIE, dwarf::DW_AT_GNU_template_name, + cast<MDString>(Val)->getString()); + } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) { + assert(isa<MDNode>(Val)); + DIArray A(cast<MDNode>(Val)); + addTemplateParams(*ParamDIE, A); + } + } } /// getOrCreateNameSpace - Create a DIE for DINameSpace. DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(NS.getContext()); + DIE *NDie = getDIE(NS); if (NDie) return NDie; - NDie = new DIE(dwarf::DW_TAG_namespace); - insertDIE(NS, NDie); + NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS); + if (!NS.getName().empty()) { addString(NDie, dwarf::DW_AT_name, NS.getName()); addAccelNamespace(NS.getName(), NDie); + addGlobalName(NS.getName(), NDie, NS.getContext()); } else addAccelNamespace("(anonymous namespace)", NDie); addSourceLine(NDie, NS); - addToContextOwner(NDie, NS.getContext()); return NDie; } -/// getRealLinkageName - If special LLVM prefix that is used to inform the asm -/// printer to not emit usual symbol prefix before the symbol name is used then -/// return linkage name after skipping this special LLVM prefix. -static StringRef getRealLinkageName(StringRef LinkageName) { - char One = '\1'; - if (LinkageName.startswith(StringRef(&One, 1))) - return LinkageName.substr(1); - return LinkageName; -} - /// getOrCreateSubprogramDIE - Create new DIE using SP. DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE (as is the case for member function + // declarations). + DIE *ContextDIE = getOrCreateContextDIE(resolve(SP.getContext())); + DIE *SPDie = getDIE(SP); if (SPDie) return SPDie; - SPDie = new DIE(dwarf::DW_TAG_subprogram); + DISubprogram SPDecl = SP.getFunctionDeclaration(); + if (SPDecl.isSubprogram()) + // Add subprogram definitions to the CU die directly. + ContextDIE = CUDie.get(); // DW_TAG_inlined_subroutine may refer to this DIE. - insertDIE(SP, SPDie); + SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP); - DISubprogram SPDecl = SP.getFunctionDeclaration(); DIE *DeclDie = NULL; - if (SPDecl.isSubprogram()) { + if (SPDecl.isSubprogram()) DeclDie = getOrCreateSubprogramDIE(SPDecl); - } - - // Add to context owner. - addToContextOwner(SPDie, SP.getContext()); // Add function template parameters. addTemplateParams(*SPDie, SP.getTemplateParams()); - // Unfortunately this code needs to stay here instead of below the - // AT_specification code in order to work around a bug in older - // gdbs that requires the linkage name to resolve multiple template - // functions. - // TODO: Remove this set of code when we get rid of the old gdb - // compatibility. - StringRef LinkageName = SP.getLinkageName(); - if (!LinkageName.empty() && DD->useDarwinGDBCompat()) - addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, - getRealLinkageName(LinkageName)); - // If this DIE is going to refer declaration info using AT_specification // then there is no need to add other attributes. if (DeclDie) { // Refer function declaration directly. - addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, - DeclDie); + addDIEEntry(SPDie, dwarf::DW_AT_specification, DeclDie); return SPDie; } // Add the linkage name if we have one. - if (!LinkageName.empty() && !DD->useDarwinGDBCompat()) + StringRef LinkageName = SP.getLinkageName(); + if (!LinkageName.empty()) addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, - getRealLinkageName(LinkageName)); + GlobalValue::getRealLinkageName(LinkageName)); // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) @@ -1183,31 +1443,31 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add the prototype if we have a prototype and we have a C like // language. + uint16_t Language = getLanguage(); if (SP.isPrototyped() && - (Language == dwarf::DW_LANG_C89 || - Language == dwarf::DW_LANG_C99 || + (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) addFlag(SPDie, dwarf::DW_AT_prototyped); - // Add Return Type. DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); + assert(SPTy.getTag() == dwarf::DW_TAG_subroutine_type && + "the type of a subprogram should be a subroutine"); - if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type) - addType(SPDie, SPTy); - else + DIArray Args = SPTy.getTypeArray(); + // Add a return type. If this is a type like a C/C++ void type we don't add a + // return type. + if (Args.getElement(0)) addType(SPDie, DIType(Args.getElement(0))); unsigned VK = SP.getVirtuality(); if (VK) { addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK); DIEBlock *Block = getDIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); - addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); - ContainingTypeMap.insert(std::make_pair(SPDie, - SP.getContainingType())); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(Block, dwarf::DW_FORM_udata, SP.getVirtualIndex()); + addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block); + ContainingTypeMap.insert( + std::make_pair(SPDie, resolve(SP.getContainingType()))); } if (!SP.isDefinition()) { @@ -1215,19 +1475,13 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add arguments. Do not add arguments for subprogram definition. They will // be handled while processing variables. - DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); - - if (SPTag == dwarf::DW_TAG_subroutine_type) - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(Args.getElement(i)); - addType(Arg, ATy); - if (ATy.isArtificial()) - addFlag(Arg, dwarf::DW_AT_artificial); - SPDie->addChild(Arg); - } + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, *SPDie); + DIType ATy(Args.getElement(i)); + addType(Arg, ATy); + if (ATy.isArtificial()) + addFlag(Arg, dwarf::DW_AT_artificial); + } } if (SP.isArtificial()) @@ -1274,16 +1528,16 @@ static const ConstantExpr *getMergedGlobalExpr(const Value *V) { } /// createGlobalVariableDIE - create global variable DIE. -void CompileUnit::createGlobalVariableDIE(const MDNode *N) { +void CompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { + // Check for pre-existence. - if (getDIE(N)) + if (getDIE(GV)) return; - DIGlobalVariable GV(N); - if (!GV.Verify()) + if (!GV.isGlobalVariable()) return; - DIDescriptor GVContext = GV.getContext(); + DIScope GVContext = GV.getContext(); DIType GTy = GV.getType(); // If this is a static data member definition, some attributes belong @@ -1294,35 +1548,30 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (SDMDecl.Verify()) { assert(SDMDecl.isStaticMember() && "Expected static member decl"); // We need the declaration DIE that is in the static member's class. - // But that class might not exist in the DWARF yet. - // Creating the class will create the static member decl DIE. - getOrCreateContextDIE(SDMDecl.getContext()); - VariableDIE = getDIE(SDMDecl); - assert(VariableDIE && "Static member decl has no context?"); + VariableDIE = getOrCreateStaticMemberDIE(SDMDecl); IsStaticMember = true; } // If this is not a static data member definition, create the variable // DIE and add the initial set of attributes to it. if (!VariableDIE) { - VariableDIE = new DIE(GV.getTag()); + // Construct the context before querying for the existence of the DIE in + // case such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(GVContext); + // Add to map. - insertDIE(N, VariableDIE); + VariableDIE = createAndAddDIE(GV.getTag(), *ContextDIE, GV); // Add name and type. addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); addType(VariableDIE, GTy); // Add scoping info. - if (!GV.isLocalToUnit()) { + if (!GV.isLocalToUnit()) addFlag(VariableDIE, dwarf::DW_AT_external); - addGlobalName(GV.getName(), VariableDIE); - } // Add line number info. addSourceLine(VariableDIE, GV); - // Add to context owner. - addToContextOwner(VariableDIE, GVContext); } // Add location. @@ -1332,57 +1581,73 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (isGlobalVariable) { addToAccelTable = true; DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addOpAddress(Block, Asm->Mang->getSymbol(GV.getGlobal())); + const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal()); + if (GV.getGlobal()->isThreadLocal()) { + // FIXME: Make this work with -gsplit-dwarf. + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); + const MCExpr *Expr = + Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym); + // Based on GCC's support for TLS: + if (!DD->useSplitDwarf()) { + // 1) Start with a constNu of the appropriate pointer size + addUInt(Block, dwarf::DW_FORM_data1, + PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); + // 2) containing the (relocated) offset of the TLS variable + // within the module's TLS block. + addExpr(Block, dwarf::DW_FORM_udata, Expr); + } else { + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(Block, dwarf::DW_FORM_udata, DU->getAddrPoolIndex(Expr)); + } + // 3) followed by a custom OP to make the debugger do a TLS lookup. + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); + } else + addOpAddress(Block, Sym); // Do not create specification DIE if context is either compile unit // or a subprogram. if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && - !GVContext.isFile() && !isSubprogramContext(GVContext)) { + !GVContext.isFile() && !DD->isSubprogramContext(GVContext)) { // Create specification DIE. - VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); - addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, - dwarf::DW_FORM_ref4, VariableDIE); - addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); + VariableSpecDIE = createAndAddDIE(dwarf::DW_TAG_variable, *CUDie); + addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, VariableDIE); + addBlock(VariableSpecDIE, dwarf::DW_AT_location, Block); // A static member's declaration is already flagged as such. if (!SDMDecl.Verify()) addFlag(VariableDIE, dwarf::DW_AT_declaration); - addDie(VariableSpecDIE); } else { - addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + addBlock(VariableDIE, dwarf::DW_AT_location, Block); } - // Add linkage name. + // Add the linkage name. StringRef LinkageName = GV.getLinkageName(); - if (!LinkageName.empty()) { + if (!LinkageName.empty()) // From DWARF4: DIEs to which DW_AT_linkage_name may apply include: // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and // TAG_variable. - addString(IsStaticMember && VariableSpecDIE ? - VariableSpecDIE : VariableDIE, dwarf::DW_AT_MIPS_linkage_name, - getRealLinkageName(LinkageName)); - // In compatibility mode with older gdbs we put the linkage name on both - // the TAG_variable DIE and on the TAG_member DIE. - if (IsStaticMember && VariableSpecDIE && DD->useDarwinGDBCompat()) - addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, - getRealLinkageName(LinkageName)); - } + addString(IsStaticMember && VariableSpecDIE ? VariableSpecDIE + : VariableDIE, + dwarf::DW_AT_MIPS_linkage_name, + GlobalValue::getRealLinkageName(LinkageName)); } else if (const ConstantInt *CI = - dyn_cast_or_null<ConstantInt>(GV.getConstant())) { + dyn_cast_or_null<ConstantInt>(GV.getConstant())) { // AT_const_value was added when the static member was created. To avoid // emitting AT_const_value multiple times, we only add AT_const_value when // it is not a static member. if (!IsStaticMember) - addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType()); - } else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { + addConstantValue(VariableDIE, CI, isUnsignedDIType(DD, GTy)); + } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getOperand(11))) { addToAccelTable = true; // GV is a merged global. DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); Value *Ptr = CE->getOperand(0); - addOpAddress(Block, Asm->Mang->getSymbol(cast<GlobalValue>(Ptr))); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end()); - addUInt(Block, 0, dwarf::DW_FORM_udata, - Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + addOpAddress(Block, Asm->getSymbol(cast<GlobalValue>(Ptr))); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end()); + addUInt(Block, dwarf::DW_FORM_udata, + Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + addBlock(VariableDIE, dwarf::DW_AT_location, Block); } if (addToAccelTable) { @@ -1395,14 +1660,16 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addAccelName(GV.getLinkageName(), AddrDIE); } - return; + if (!GV.isLocalToUnit()) + addGlobalName(GV.getName(), VariableSpecDIE ? VariableSpecDIE : VariableDIE, + GV.getContext()); } /// constructSubrangeDIE - Construct subrange DIE from DISubrange. void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { - DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); - addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); + DIE *DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer); + addDIEEntry(DW_Subrange, dwarf::DW_AT_type, IndexTy); // The LowerBound value defines the lower bounds which is typically zero for // C/C++. The Count value is the number of elements. Values are 64 bit. If @@ -1415,26 +1682,22 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, int64_t Count = SR.getCount(); if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound) - addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, LowerBound); + addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound); if (Count != -1 && Count != 0) // FIXME: An unbounded array should reference the expression that defines // the array. - addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, LowerBound + Count - 1); - - Buffer.addChild(DW_Subrange); + addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, None, + LowerBound + Count - 1); } /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. -void CompileUnit::constructArrayTypeDIE(DIE &Buffer, - DICompositeType *CTy) { - Buffer.setTag(dwarf::DW_TAG_array_type); - if (CTy->isVector()) +void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { + if (CTy.isVector()) addFlag(&Buffer, dwarf::DW_AT_GNU_vector); - // Emit derived type. - addType(&Buffer, CTy->getTypeDerivedFrom()); - DIArray Elements = CTy->getTypeArray(); + // Emit the element type. + addType(&Buffer, resolve(CTy.getTypeDerivedFrom())); // Get an anonymous type for index type. // FIXME: This type should be passed down from the front end @@ -1442,16 +1705,16 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DIE *IdxTy = getIndexTyDie(); if (!IdxTy) { // Construct an anonymous type for index type. - IdxTy = new DIE(dwarf::DW_TAG_base_type); + IdxTy = createAndAddDIE(dwarf::DW_TAG_base_type, *CUDie.get()); addString(IdxTy, dwarf::DW_AT_name, "int"); - addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); + addUInt(IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int32_t)); addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, dwarf::DW_ATE_signed); - addDie(IdxTy); setIndexTyDie(IdxTy); } // Add subranges to array type. + DIArray Elements = CTy.getTypeArray(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); if (Element.getTag() == dwarf::DW_TAG_subrange_type) @@ -1459,195 +1722,183 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, } } -/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. -DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) { - DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); - StringRef Name = ETy.getName(); - addString(Enumerator, dwarf::DW_AT_name, Name); - int64_t Value = ETy.getEnumValue(); - addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); - return Enumerator; +/// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType. +void CompileUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) { + DIArray Elements = CTy.getTypeArray(); + + // Add enumerators to enumeration type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIEnumerator Enum(Elements.getElement(i)); + if (Enum.isEnumerator()) { + DIE *Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer); + StringRef Name = Enum.getName(); + addString(Enumerator, dwarf::DW_AT_name, Name); + int64_t Value = Enum.getEnumValue(); + addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); + } + } + DIType DTy = resolve(CTy.getTypeDerivedFrom()); + if (DTy) { + addType(&Buffer, DTy); + addFlag(&Buffer, dwarf::DW_AT_enum_class); + } } /// constructContainingTypeDIEs - Construct DIEs for types that contain /// vtables. void CompileUnit::constructContainingTypeDIEs() { for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(), - CE = ContainingTypeMap.end(); CI != CE; ++CI) { + CE = ContainingTypeMap.end(); + CI != CE; ++CI) { DIE *SPDie = CI->first; - const MDNode *N = CI->second; - if (!N) continue; - DIE *NDie = getDIE(N); - if (!NDie) continue; - addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + DIDescriptor D(CI->second); + if (!D) + continue; + DIE *NDie = getDIE(D); + if (!NDie) + continue; + addDIEEntry(SPDie, dwarf::DW_AT_containing_type, NDie); } } /// constructVariableDIE - Construct a DIE for the given DbgVariable. -DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { - StringRef Name = DV->getName(); - - // Translate tag to proper Dwarf tag. - unsigned Tag = DV->getTag(); +DIE *CompileUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) { + StringRef Name = DV.getName(); // Define variable debug information entry. - DIE *VariableDie = new DIE(Tag); - DbgVariable *AbsVar = DV->getAbstractVariable(); + DIE *VariableDie = new DIE(DV.getTag()); + DbgVariable *AbsVar = DV.getAbstractVariable(); DIE *AbsDIE = AbsVar ? AbsVar->getDIE() : NULL; if (AbsDIE) - addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsDIE); + addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, AbsDIE); else { - addString(VariableDie, dwarf::DW_AT_name, Name); - addSourceLine(VariableDie, DV->getVariable()); - addType(VariableDie, DV->getType()); + if (!Name.empty()) + addString(VariableDie, dwarf::DW_AT_name, Name); + addSourceLine(VariableDie, DV.getVariable()); + addType(VariableDie, DV.getType()); } - if (DV->isArtificial()) + if (DV.isArtificial()) addFlag(VariableDie, dwarf::DW_AT_artificial); if (isScopeAbstract) { - DV->setDIE(VariableDie); + DV.setDIE(VariableDie); return VariableDie; } // Add variable address. - unsigned Offset = DV->getDotDebugLocOffset(); + unsigned Offset = DV.getDotDebugLocOffset(); if (Offset != ~0U) { - addLabel(VariableDie, dwarf::DW_AT_location, - dwarf::DW_FORM_data4, - Asm->GetTempSymbol("debug_loc", Offset)); - DV->setDIE(VariableDie); + addSectionLabel(VariableDie, dwarf::DW_AT_location, + Asm->GetTempSymbol("debug_loc", Offset)); + DV.setDIE(VariableDie); return VariableDie; } // Check if variable is described by a DBG_VALUE instruction. - if (const MachineInstr *DVInsn = DV->getMInsn()) { - bool updated = false; - if (DVInsn->getNumOperands() == 3) { - if (DVInsn->getOperand(0).isReg()) { - const MachineOperand RegOp = DVInsn->getOperand(0); - const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); - if (DVInsn->getOperand(1).isImm() && - TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) { - unsigned FrameReg = 0; - const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = - TFI->getFrameIndexReference(*Asm->MF, - DVInsn->getOperand(1).getImm(), - FrameReg); - MachineLocation Location(FrameReg, Offset); - addVariableAddress(DV, VariableDie, Location); - - } else if (RegOp.getReg()) - addVariableAddress(DV, VariableDie, - MachineLocation(RegOp.getReg())); - updated = true; - } - else if (DVInsn->getOperand(0).isImm()) - updated = - addConstantValue(VariableDie, DVInsn->getOperand(0), - DV->getType()); - else if (DVInsn->getOperand(0).isFPImm()) - updated = - addConstantFPValue(VariableDie, DVInsn->getOperand(0)); - else if (DVInsn->getOperand(0).isCImm()) - updated = - addConstantValue(VariableDie, - DVInsn->getOperand(0).getCImm(), - DV->getType().isUnsignedDIType()); - } else { - addVariableAddress(DV, VariableDie, - Asm->getDebugValueLocation(DVInsn)); - updated = true; - } - if (!updated) { - // If variableDie is not updated then DBG_VALUE instruction does not - // have valid variable info. - delete VariableDie; - return NULL; - } - DV->setDIE(VariableDie); + if (const MachineInstr *DVInsn = DV.getMInsn()) { + assert(DVInsn->getNumOperands() == 3); + if (DVInsn->getOperand(0).isReg()) { + const MachineOperand RegOp = DVInsn->getOperand(0); + // If the second operand is an immediate, this is an indirect value. + if (DVInsn->getOperand(1).isImm()) { + MachineLocation Location(RegOp.getReg(), + DVInsn->getOperand(1).getImm()); + addVariableAddress(DV, VariableDie, Location); + } else if (RegOp.getReg()) + addVariableAddress(DV, VariableDie, MachineLocation(RegOp.getReg())); + } else if (DVInsn->getOperand(0).isImm()) + addConstantValue(VariableDie, DVInsn->getOperand(0), DV.getType()); + else if (DVInsn->getOperand(0).isFPImm()) + addConstantFPValue(VariableDie, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isCImm()) + addConstantValue(VariableDie, DVInsn->getOperand(0).getCImm(), + isUnsignedDIType(DD, DV.getType())); + + DV.setDIE(VariableDie); return VariableDie; } else { // .. else use frame index. - int FI = DV->getFrameIndex(); + int FI = DV.getFrameIndex(); if (FI != ~0) { unsigned FrameReg = 0; const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = - TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); MachineLocation Location(FrameReg, Offset); addVariableAddress(DV, VariableDie, Location); } } - DV->setDIE(VariableDie); + DV.setDIE(VariableDie); return VariableDie; } -/// createMemberDIE - Create new member DIE. -DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { - DIE *MemberDie = new DIE(DT.getTag()); +/// constructMemberDIE - Construct member DIE from DIDerivedType. +void CompileUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { + DIE *MemberDie = createAndAddDIE(DT.getTag(), Buffer); StringRef Name = DT.getName(); if (!Name.empty()) addString(MemberDie, dwarf::DW_AT_name, Name); - addType(MemberDie, DT.getTypeDerivedFrom()); + addType(MemberDie, resolve(DT.getTypeDerivedFrom())); addSourceLine(MemberDie, DT); - DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - - uint64_t Size = DT.getSizeInBits(); - uint64_t FieldSize = DT.getOriginalTypeSize(); - - if (Size != FieldSize) { - // Handle bitfield. - addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); - addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); - - uint64_t Offset = DT.getOffsetInBits(); - uint64_t AlignMask = ~(DT.getAlignInBits() - 1); - uint64_t HiMark = (Offset + FieldSize) & AlignMask; - uint64_t FieldOffset = (HiMark - FieldSize); - Offset -= FieldOffset; - - // Maybe we need to work from the other end. - if (Asm->getDataLayout().isLittleEndian()) - Offset = FieldSize - (Offset + Size); - addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); - - // Here WD_AT_data_member_location points to the anonymous - // field that includes this bit field. - addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3); - - } else - // This is not a bitfield. - addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); - - if (DT.getTag() == dwarf::DW_TAG_inheritance - && DT.isVirtual()) { + if (DT.getTag() == dwarf::DW_TAG_inheritance && DT.isVirtual()) { // For C++, virtual base classes are not at fixed offset. Use following // expression to extract appropriate offset from vtable. // BaseAddr = ObAddr + *((*ObAddr) - Offset) DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits()); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, - VBaseLocationDie); - } else - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits()); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + + addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie); + } else { + uint64_t Size = DT.getSizeInBits(); + uint64_t FieldSize = getBaseTypeSize(DD, DT); + uint64_t OffsetInBytes; + + if (Size != FieldSize) { + // Handle bitfield. + addUInt(MemberDie, dwarf::DW_AT_byte_size, None, + getBaseTypeSize(DD, DT) >> 3); + addUInt(MemberDie, dwarf::DW_AT_bit_size, None, DT.getSizeInBits()); + + uint64_t Offset = DT.getOffsetInBits(); + uint64_t AlignMask = ~(DT.getAlignInBits() - 1); + uint64_t HiMark = (Offset + FieldSize) & AlignMask; + uint64_t FieldOffset = (HiMark - FieldSize); + Offset -= FieldOffset; + + // Maybe we need to work from the other end. + if (Asm->getDataLayout().isLittleEndian()) + Offset = FieldSize - (Offset + Size); + addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset); + + // Here WD_AT_data_member_location points to the anonymous + // field that includes this bit field. + OffsetInBytes = FieldOffset >> 3; + } else + // This is not a bitfield. + OffsetInBytes = DT.getOffsetInBits() >> 3; + + if (DD->getDwarfVersion() <= 2) { + DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); + addUInt(MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes); + addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie); + } else + addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, + OffsetInBytes); + } if (DT.isProtected()) addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, @@ -1671,17 +1922,26 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { if (DT.isArtificial()) addFlag(MemberDie, dwarf::DW_AT_artificial); - - return MemberDie; } -/// createStaticMemberDIE - Create new DIE for C++ static member. -DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) { +/// getOrCreateStaticMemberDIE - Create new DIE for C++ static member. +DIE *CompileUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { if (!DT.Verify()) return NULL; - DIE *StaticMemberDIE = new DIE(DT.getTag()); - DIType Ty = DT.getTypeDerivedFrom(); + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(resolve(DT.getContext())); + assert(dwarf::isType(ContextDIE->getTag()) && + "Static member should belong to a type."); + + DIE *StaticMemberDIE = getDIE(DT); + if (StaticMemberDIE) + return StaticMemberDIE; + + StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT); + + DIType Ty = resolve(DT.getTypeDerivedFrom()); addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName()); addType(StaticMemberDIE, Ty); @@ -1702,10 +1962,20 @@ DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) { dwarf::DW_ACCESS_public); if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant())) - addConstantValue(StaticMemberDIE, CI, Ty.isUnsignedDIType()); + addConstantValue(StaticMemberDIE, CI, isUnsignedDIType(DD, Ty)); if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant())) addConstantFPValue(StaticMemberDIE, CFP); - insertDIE(DT, StaticMemberDIE); return StaticMemberDIE; } + +void CompileUnit::emitHeader(const MCSection *ASection, + const MCSymbol *ASectionSym) { + Asm->OutStreamer.AddComment("DWARF version number"); + Asm->EmitInt16(DD->getDwarfVersion()); + Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); + Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()), + ASectionSym); + Asm->OutStreamer.AddComment("Address Size (in bytes)"); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 8f08c63..69a96df 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -15,15 +15,16 @@ #define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H #include "DIE.h" +#include "DwarfDebug.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/StringMap.h" #include "llvm/DebugInfo.h" +#include "llvm/MC/MCExpr.h" namespace llvm { -class DwarfDebug; -class DwarfUnits; class MachineLocation; class MachineOperand; class ConstantInt; @@ -38,11 +39,10 @@ class CompileUnit { /// unsigned UniqueID; - /// Language - The DW_AT_language of the compile unit - /// - unsigned Language; + /// Node - MDNode for the compile unit. + DICompileUnit Node; - /// Die - Compile unit debug information entry. + /// CUDie - Compile unit debug information entry. /// const OwningPtr<DIE> CUDie; @@ -56,28 +56,28 @@ class CompileUnit { /// IndexTyDie - An anonymous type for index type. Owned by CUDie. DIE *IndexTyDie; - /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton + /// MDNodeToDieMap - Tracks the mapping of unit level debug information /// variables to debug information entries. DenseMap<const MDNode *, DIE *> MDNodeToDieMap; - /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton + /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug information /// descriptors to debug information entries using a DIEEntry proxy. DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; /// GlobalNames - A map of globally visible named entities for this unit. /// - StringMap<DIE*> GlobalNames; + StringMap<DIE *> GlobalNames; /// GlobalTypes - A map of globally visible types for this unit. /// - StringMap<DIE*> GlobalTypes; + StringMap<DIE *> GlobalTypes; /// AccelNames - A map of names for the name accelerator table. /// - StringMap<std::vector<DIE*> > AccelNames; - StringMap<std::vector<DIE*> > AccelObjC; - StringMap<std::vector<DIE*> > AccelNamespace; - StringMap<std::vector<std::pair<DIE*, unsigned> > > AccelTypes; + StringMap<std::vector<DIE *> > AccelNames; + StringMap<std::vector<DIE *> > AccelObjC; + StringMap<std::vector<DIE *> > AccelNamespace; + StringMap<std::vector<std::pair<DIE *, unsigned> > > AccelTypes; /// DIEBlocks - A list of all the DIEBlocks in use. std::vector<DIEBlock *> DIEBlocks; @@ -87,163 +87,161 @@ class CompileUnit { /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap<DIE *, const MDNode *> ContainingTypeMap; - /// Offset of the CUDie from beginning of debug info section. - unsigned DebugInfoOffset; + // DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; - /// getLowerBoundDefault - Return the default lower bound for an array. If the - /// DWARF version doesn't handle the language, return -1. - int64_t getDefaultLowerBound() const; + // DIEIntegerOne - A preallocated DIEValue because 1 is used frequently. + DIEInteger *DIEIntegerOne; public: - CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW, - DwarfUnits *); + CompileUnit(unsigned UID, DIE *D, DICompileUnit CU, AsmPrinter *A, + DwarfDebug *DW, DwarfUnits *DWU); ~CompileUnit(); // Accessors. - unsigned getUniqueID() const { return UniqueID; } - unsigned getLanguage() const { return Language; } - DIE* getCUDie() const { return CUDie.get(); } - unsigned getDebugInfoOffset() const { return DebugInfoOffset; } - const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; } - const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; } - - const StringMap<std::vector<DIE*> > &getAccelNames() const { + unsigned getUniqueID() const { return UniqueID; } + uint16_t getLanguage() const { return Node.getLanguage(); } + DICompileUnit getNode() const { return Node; } + DIE *getCUDie() const { return CUDie.get(); } + const StringMap<DIE *> &getGlobalNames() const { return GlobalNames; } + const StringMap<DIE *> &getGlobalTypes() const { return GlobalTypes; } + + const StringMap<std::vector<DIE *> > &getAccelNames() const { return AccelNames; } - const StringMap<std::vector<DIE*> > &getAccelObjC() const { + const StringMap<std::vector<DIE *> > &getAccelObjC() const { return AccelObjC; } - const StringMap<std::vector<DIE*> > &getAccelNamespace() const { + const StringMap<std::vector<DIE *> > &getAccelNamespace() const { return AccelNamespace; } - const StringMap<std::vector<std::pair<DIE*, unsigned > > > - &getAccelTypes() const { + const StringMap<std::vector<std::pair<DIE *, unsigned> > > & + getAccelTypes() const { return AccelTypes; } + unsigned getDebugInfoOffset() const { return DebugInfoOffset; } void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } + /// hasContent - Return true if this compile unit has something to write out. /// bool hasContent() const { return !CUDie->getChildren().empty(); } + /// getParentContextString - Get a string containing the language specific + /// context for a global name. + std::string getParentContextString(DIScope Context) const; + /// addGlobalName - Add a new global entity to the compile unit. /// - void addGlobalName(StringRef Name, DIE *Die) { GlobalNames[Name] = Die; } + void addGlobalName(StringRef Name, DIE *Die, DIScope Context); /// addGlobalType - Add a new global type to the compile unit. /// void addGlobalType(DIType Ty); + /// addPubTypes - Add a set of types from the subprogram to the global types. + void addPubTypes(DISubprogram SP); /// addAccelName - Add a new name to the name accelerator table. - void addAccelName(StringRef Name, DIE *Die) { - std::vector<DIE*> &DIEs = AccelNames[Name]; - DIEs.push_back(Die); - } - void addAccelObjC(StringRef Name, DIE *Die) { - std::vector<DIE*> &DIEs = AccelObjC[Name]; - DIEs.push_back(Die); - } - void addAccelNamespace(StringRef Name, DIE *Die) { - std::vector<DIE*> &DIEs = AccelNamespace[Name]; - DIEs.push_back(Die); - } - void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) { - std::vector<std::pair<DIE*, unsigned > > &DIEs = AccelTypes[Name]; - DIEs.push_back(Die); - } + void addAccelName(StringRef Name, DIE *Die); - /// getDIE - Returns the debug information entry map slot for the - /// specified debug variable. - DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } + /// addAccelObjC - Add a new name to the ObjC accelerator table. + void addAccelObjC(StringRef Name, DIE *Die); - DIEBlock *getDIEBlock() { - return new (DIEValueAllocator) DIEBlock(); - } + /// addAccelNamespace - Add a new name to the namespace accelerator table. + void addAccelNamespace(StringRef Name, DIE *Die); - /// insertDIE - Insert DIE into the map. - void insertDIE(const MDNode *N, DIE *D) { - MDNodeToDieMap.insert(std::make_pair(N, D)); - } + /// addAccelType - Add a new type to the type accelerator table. + void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die); - /// getDIEEntry - Returns the debug information entry for the specified - /// debug variable. - DIEEntry *getDIEEntry(const MDNode *N) { - DenseMap<const MDNode *, DIEEntry *>::iterator I = - MDNodeToDIEEntryMap.find(N); - if (I == MDNodeToDIEEntryMap.end()) - return NULL; - return I->second; - } + /// getDIE - Returns the debug information entry map slot for the + /// specified debug variable. We delegate the request to DwarfDebug + /// when the MDNode can be part of the type system, since DIEs for + /// the type system can be shared across CUs and the mappings are + /// kept in DwarfDebug. + DIE *getDIE(DIDescriptor D) const; - /// insertDIEEntry - Insert debug information entry into the map. - void insertDIEEntry(const MDNode *N, DIEEntry *E) { - MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); - } + DIEBlock *getDIEBlock() { return new (DIEValueAllocator) DIEBlock(); } + + /// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug + /// when the MDNode can be part of the type system, since DIEs for + /// the type system can be shared across CUs and the mappings are + /// kept in DwarfDebug. + void insertDIE(DIDescriptor Desc, DIE *D); /// addDie - Adds or interns the DIE to the compile unit. /// - void addDie(DIE *Buffer) { - this->CUDie->addChild(Buffer); - } - - // getIndexTyDie - Get an anonymous type for index type. - DIE *getIndexTyDie() { - return IndexTyDie; - } - - // setIndexTyDie - Set D as anonymous type for index which can be reused - // later. - void setIndexTyDie(DIE *D) { - IndexTyDie = D; - } + void addDie(DIE *Buffer) { CUDie->addChild(Buffer); } /// addFlag - Add a flag that is true to the DIE. - void addFlag(DIE *Die, unsigned Attribute); + void addFlag(DIE *Die, dwarf::Attribute Attribute); /// addUInt - Add an unsigned integer attribute data and value. /// - void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); + void addUInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, + uint64_t Integer); + + void addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer); /// addSInt - Add an signed integer attribute data and value. /// - void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); + void addSInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, + int64_t Integer); + + void addSInt(DIEBlock *Die, Optional<dwarf::Form> Form, int64_t Integer); /// addString - Add a string attribute data and value. /// - void addString(DIE *Die, unsigned Attribute, const StringRef Str); + void addString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str); /// addLocalString - Add a string attribute data and value. /// - void addLocalString(DIE *Die, unsigned Attribute, const StringRef Str); + void addLocalString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str); + + /// addExpr - Add a Dwarf expression attribute data and value. + /// + void addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr); /// addLabel - Add a Dwarf label attribute data and value. /// - void addLabel(DIE *Die, unsigned Attribute, unsigned Form, + void addLabel(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form, const MCSymbol *Label); + void addLabel(DIEBlock *Die, dwarf::Form Form, const MCSymbol *Label); + + /// addSectionLabel - Add a Dwarf section label attribute data and value. + /// + void addSectionLabel(DIE *Die, dwarf::Attribute Attribute, const MCSymbol *Label); + + /// addSectionOffset - Add an offset into a section attribute data and value. + /// + void addSectionOffset(DIE *Die, dwarf::Attribute Attribute, uint64_t Integer); + /// addLabelAddress - Add a dwarf label attribute data and value using /// either DW_FORM_addr or DW_FORM_GNU_addr_index. /// - void addLabelAddress(DIE *Die, unsigned Attribute, MCSymbol *Label); + void addLabelAddress(DIE *Die, dwarf::Attribute Attribute, MCSymbol *Label); /// addOpAddress - Add a dwarf op address data and value using the /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. /// - void addOpAddress(DIE *Die, MCSymbol *Label); + void addOpAddress(DIEBlock *Die, const MCSymbol *Label); - /// addDelta - Add a label delta attribute data and value. + /// addSectionDelta - Add a label delta attribute data and value. + void addSectionDelta(DIE *Die, dwarf::Attribute Attribute, const MCSymbol *Hi, + const MCSymbol *Lo); + + /// addDIEEntry - Add a DIE attribute data and value. /// - void addDelta(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Hi, const MCSymbol *Lo); + void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry); /// addDIEEntry - Add a DIE attribute data and value. /// - void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry); + void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIEEntry *Entry); /// addBlock - Add block data. /// - void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); + void addBlock(DIE *Die, dwarf::Attribute Attribute, DIEBlock *Block); /// addSourceLine - Add location information to specified debug information /// entry. @@ -256,33 +254,33 @@ public: /// addAddress - Add an address attribute to a die based on the location /// provided. - void addAddress(DIE *Die, unsigned Attribute, - const MachineLocation &Location); + void addAddress(DIE *Die, dwarf::Attribute Attribute, const MachineLocation &Location, + bool Indirect = false); /// addConstantValue - Add constant value entry in variable DIE. - bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); - bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); - bool addConstantValue(DIE *Die, const APInt &Val, bool Unsigned); + void addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); + void addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); + void addConstantValue(DIE *Die, const APInt &Val, bool Unsigned); /// addConstantFPValue - Add constant value entry in variable DIE. - bool addConstantFPValue(DIE *Die, const MachineOperand &MO); - bool addConstantFPValue(DIE *Die, const ConstantFP *CFP); + void addConstantFPValue(DIE *Die, const MachineOperand &MO); + void addConstantFPValue(DIE *Die, const ConstantFP *CFP); /// addTemplateParams - Add template parameters in buffer. void addTemplateParams(DIE &Buffer, DIArray TParams); /// addRegisterOp - Add register operand. - void addRegisterOp(DIE *TheDie, unsigned Reg); + void addRegisterOp(DIEBlock *TheDie, unsigned Reg); /// addRegisterOffset - Add register offset. - void addRegisterOffset(DIE *TheDie, unsigned Reg, int64_t Offset); + void addRegisterOffset(DIEBlock *TheDie, unsigned Reg, int64_t Offset); /// addComplexAddress - Start with the address based on the location provided, /// and generate the DWARF information necessary to find the actual variable /// (navigating the extra location information encoded in the type) based on /// the starting location. Add the DWARF information to the die. /// - void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + void addComplexAddress(const DbgVariable &DV, DIE *Die, dwarf::Attribute Attribute, const MachineLocation &Location); // FIXME: Should be reformulated in terms of addComplexAddress. @@ -292,20 +290,18 @@ public: /// starting location. Add the DWARF information to the die. Obsolete, /// please use addComplexAddress instead. /// - void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + void addBlockByrefAddress(const DbgVariable &DV, DIE *Die, dwarf::Attribute Attribute, const MachineLocation &Location); /// addVariableAddress - Add DW_AT_location attribute for a /// DbgVariable based on provided MachineLocation. - void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location); - - /// addToContextOwner - Add Die into the list of its context owner's children. - void addToContextOwner(DIE *Die, DIDescriptor Context); + void addVariableAddress(const DbgVariable &DV, DIE *Die, + MachineLocation Location); /// addType - Add a new type attribute to the specified entity. This takes /// and attribute parameter because DW_AT_friend attributes are also /// type references. - void addType(DIE *Entity, DIType Ty, unsigned Attribute = dwarf::DW_AT_type); + void addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute = dwarf::DW_AT_type); /// getOrCreateNameSpace - Create a DIE for DINameSpace. DIE *getOrCreateNameSpace(DINameSpace NS); @@ -317,66 +313,103 @@ public: /// given DIType. DIE *getOrCreateTypeDIE(const MDNode *N); - /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE - /// for the given DITemplateTypeParameter. - DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP); + /// getOrCreateContextDIE - Get context owner's DIE. + DIE *getOrCreateContextDIE(DIScope Context); - /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create - /// new DIE for the given DITemplateValueParameter. - DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP); + /// createGlobalVariableDIE - create global variable DIE. + void createGlobalVariableDIE(DIGlobalVariable GV); - /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug - /// information entry. - DIEEntry *createDIEEntry(DIE *Entry); + /// constructContainingTypeDIEs - Construct DIEs for types that contain + /// vtables. + void constructContainingTypeDIEs(); - /// createGlobalVariableDIE - create global variable DIE. - void createGlobalVariableDIE(const MDNode *N); + /// constructVariableDIE - Construct a DIE for the given DbgVariable. + DIE *constructVariableDIE(DbgVariable &DV, bool isScopeAbstract); + + /// Create a DIE with the given Tag, add the DIE to its parent, and + /// call insertDIE if MD is not null. + DIE *createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N = DIDescriptor()); + + /// Compute the size of a header for this unit, not including the initial + /// length field. + unsigned getHeaderSize() const { + return sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t); // Pointer Size (in bytes) + } - void addPubTypes(DISubprogram SP); + /// Emit the header for this unit, not including the initial length field. + void emitHeader(const MCSection *ASection, const MCSymbol *ASectionSym); +private: /// constructTypeDIE - Construct basic type die from DIBasicType. - void constructTypeDIE(DIE &Buffer, - DIBasicType BTy); + void constructTypeDIE(DIE &Buffer, DIBasicType BTy); /// constructTypeDIE - Construct derived type die from DIDerivedType. - void constructTypeDIE(DIE &Buffer, - DIDerivedType DTy); + void constructTypeDIE(DIE &Buffer, DIDerivedType DTy); /// constructTypeDIE - Construct type DIE from DICompositeType. - void constructTypeDIE(DIE &Buffer, - DICompositeType CTy); + void constructTypeDIE(DIE &Buffer, DICompositeType CTy); /// constructSubrangeDIE - Construct subrange DIE from DISubrange. void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. - void constructArrayTypeDIE(DIE &Buffer, - DICompositeType *CTy); + void constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy); /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. - DIE *constructEnumTypeDIE(DIEnumerator ETy); + void constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy); - /// constructContainingTypeDIEs - Construct DIEs for types that contain - /// vtables. - void constructContainingTypeDIEs(); + /// constructMemberDIE - Construct member DIE from DIDerivedType. + void constructMemberDIE(DIE &Buffer, DIDerivedType DT); - /// constructVariableDIE - Construct a DIE for the given DbgVariable. - DIE *constructVariableDIE(DbgVariable *DV, bool isScopeAbstract); + /// constructTemplateTypeParameterDIE - Construct new DIE for the given + /// DITemplateTypeParameter. + void constructTemplateTypeParameterDIE(DIE &Buffer, + DITemplateTypeParameter TP); - /// createMemberDIE - Create new member DIE. - DIE *createMemberDIE(DIDerivedType DT); + /// constructTemplateValueParameterDIE - Construct new DIE for the given + /// DITemplateValueParameter. + void constructTemplateValueParameterDIE(DIE &Buffer, + DITemplateValueParameter TVP); - /// createStaticMemberDIE - Create new static data member DIE. - DIE *createStaticMemberDIE(DIDerivedType DT); + /// getOrCreateStaticMemberDIE - Create new static data member DIE. + DIE *getOrCreateStaticMemberDIE(DIDerivedType DT); - /// getOrCreateContextDIE - Get context owner's DIE. - DIE *getOrCreateContextDIE(DIDescriptor Context); + /// Offset of the CUDie from beginning of debug info section. + unsigned DebugInfoOffset; -private: + /// getLowerBoundDefault - Return the default lower bound for an array. If the + /// DWARF version doesn't handle the language, return -1. + int64_t getDefaultLowerBound() const; - // DIEValueAllocator - All DIEValues are allocated through this allocator. - BumpPtrAllocator DIEValueAllocator; - DIEInteger *DIEIntegerOne; + /// getDIEEntry - Returns the debug information entry for the specified + /// debug variable. + DIEEntry *getDIEEntry(const MDNode *N) const { + return MDNodeToDIEEntryMap.lookup(N); + } + + /// insertDIEEntry - Insert debug information entry into the map. + void insertDIEEntry(const MDNode *N, DIEEntry *E) { + MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); + } + + // getIndexTyDie - Get an anonymous type for index type. + DIE *getIndexTyDie() { return IndexTyDie; } + + // setIndexTyDie - Set D as anonymous type for index which can be reused + // later. + void setIndexTyDie(DIE *D) { IndexTyDie = D; } + + /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug + /// information entry. + DIEEntry *createDIEEntry(DIE *Entry); + + /// resolve - Look in the DwarfDebug map for the MDNode that + /// corresponds to the reference. + template <typename T> T resolve(DIRef<T> Ref) const { + return DD->resolve(Ref); + } }; } // end llvm namespace diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 1e706cc..d1e1ad1 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "dwarfdebug" #include "DwarfDebug.h" #include "DIE.h" +#include "DIEHash.h" #include "DwarfAccelTable.h" #include "DwarfCompileUnit.h" #include "llvm/ADT/STLExtras.h" @@ -34,8 +35,10 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" #include "llvm/Support/Timer.h" #include "llvm/Support/ValueHandle.h" @@ -46,61 +49,69 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", - cl::Hidden, - cl::desc("Disable debug info printing")); +static cl::opt<bool> +DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, + cl::desc("Disable debug info printing")); -static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden, - cl::desc("Make an absence of debug location information explicit."), - cl::init(false)); +static cl::opt<bool> UnknownLocations( + "use-unknown-locations", cl::Hidden, + cl::desc("Make an absence of debug location information explicit."), + cl::init(false)); -static cl::opt<bool> GenerateDwarfPubNamesSection("generate-dwarf-pubnames", - cl::Hidden, cl::init(false), - cl::desc("Generate DWARF pubnames section")); +static cl::opt<bool> +GenerateODRHash("generate-odr-hash", cl::Hidden, + cl::desc("Add an ODR hash to external type DIEs."), + cl::init(false)); -namespace { - enum DefaultOnOff { - Default, Enable, Disable - }; -} - -static cl::opt<DefaultOnOff> DwarfAccelTables("dwarf-accel-tables", cl::Hidden, - cl::desc("Output prototype dwarf accelerator tables."), - cl::values( - clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled"), - clEnumValEnd), - cl::init(Default)); - -static cl::opt<DefaultOnOff> DarwinGDBCompat("darwin-gdb-compat", cl::Hidden, - cl::desc("Compatibility with Darwin gdb."), - cl::values( - clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled"), - clEnumValEnd), - cl::init(Default)); - -static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden, - cl::desc("Output prototype dwarf split debug info."), - cl::values( - clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled"), - clEnumValEnd), - cl::init(Default)); +static cl::opt<bool> +GenerateCUHash("generate-cu-hash", cl::Hidden, + cl::desc("Add the CU hash as the dwo_id."), + cl::init(false)); -namespace { - const char *DWARFGroupName = "DWARF Emission"; - const char *DbgTimerName = "DWARF Debug Writer"; +static cl::opt<bool> +GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden, + cl::desc("Generate GNU-style pubnames and pubtypes"), + cl::init(false)); - struct CompareFirst { - template <typename T> bool operator()(const T &lhs, const T &rhs) const { - return lhs.first < rhs.first; - } - }; -} // end anonymous namespace +namespace { +enum DefaultOnOff { + Default, + Enable, + Disable +}; +} + +static cl::opt<DefaultOnOff> +DwarfAccelTables("dwarf-accel-tables", cl::Hidden, + cl::desc("Output prototype dwarf accelerator tables."), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); + +static cl::opt<DefaultOnOff> +SplitDwarf("split-dwarf", cl::Hidden, + cl::desc("Output prototype dwarf split debug info."), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); + +static cl::opt<DefaultOnOff> +DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, + cl::desc("Generate DWARF pubnames and pubtypes sections"), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); + +static cl::opt<unsigned> +DwarfVersionNumber("dwarf-version", cl::Hidden, + cl::desc("Generate DWARF for dwarf version."), + cl::init(0)); + +static const char *const DWARFGroupName = "DWARF Emission"; +static const char *const DbgTimerName = "DWARF Debug Writer"; //===----------------------------------------------------------------------===// @@ -110,6 +121,13 @@ static const unsigned InitAbbreviationsSetSize = 9; // log2(512) namespace llvm { +/// resolve - Look in the DwarfDebug map for the MDNode that +/// corresponds to the reference. +template <typename T> +T DbgVariable::resolve(DIRef<T> Ref) const { + return DD->resolve(Ref); +} + DIType DbgVariable::getType() const { DIType Ty = Var.getType(); // FIXME: isBlockByrefVariable should be reformulated in terms of complex @@ -140,21 +158,16 @@ DIType DbgVariable::getType() const { the pointers and __Block_byref_x_VarName struct to find the actual value of the variable. The function addBlockByrefType does this. */ DIType subType = Ty; - unsigned tag = Ty.getTag(); - - if (tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - subType = DTy.getTypeDerivedFrom(); - } + uint16_t tag = Ty.getTag(); - DICompositeType blockStruct = DICompositeType(subType); - DIArray Elements = blockStruct.getTypeArray(); + if (tag == dwarf::DW_TAG_pointer_type) + subType = resolve(DIDerivedType(Ty).getTypeDerivedFrom()); + DIArray Elements = DICompositeType(subType).getTypeArray(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - DIDerivedType DT = DIDerivedType(Element); + DIDerivedType DT(Elements.getElement(i)); if (getName() == DT.getName()) - return (DT.getTypeDerivedFrom()); + return (resolve(DT.getTypeDerivedFrom())); } } return Ty; @@ -162,15 +175,23 @@ DIType DbgVariable::getType() const { } // end llvm namespace +/// Return Dwarf Version by checking module flags. +static unsigned getDwarfVersionFromModule(const Module *M) { + Value *Val = M->getModuleFlag("Dwarf Version"); + if (!Val) + return dwarf::DWARF_VERSION; + return cast<ConstantInt>(Val)->getZExtValue(); +} + DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : Asm(A), MMI(Asm->MMI), FirstCU(0), AbbreviationsSet(InitAbbreviationsSetSize), SourceIdMap(DIEValueAllocator), PrevLabel(NULL), GlobalCUIndexCount(0), - InfoHolder(A, &AbbreviationsSet, &Abbreviations, "info_string", + InfoHolder(A, &AbbreviationsSet, Abbreviations, "info_string", DIEValueAllocator), SkeletonAbbrevSet(InitAbbreviationsSetSize), - SkeletonHolder(A, &SkeletonAbbrevSet, &SkeletonAbbrevs, "skel_string", + SkeletonHolder(A, &SkeletonAbbrevSet, SkeletonAbbrevs, "skel_string", DIEValueAllocator) { DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; @@ -180,37 +201,34 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; - // Turn on accelerator tables and older gdb compatibility - // for Darwin. + // Turn on accelerator tables for Darwin by default, pubnames by + // default for non-Darwin, and handle split dwarf. bool IsDarwin = Triple(A->getTargetTriple()).isOSDarwin(); - if (DarwinGDBCompat == Default) { - if (IsDarwin) - IsDarwinGDBCompat = true; - else - IsDarwinGDBCompat = false; - } else - IsDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false; - if (DwarfAccelTables == Default) { - if (IsDarwin) - HasDwarfAccelTables = true; - else - HasDwarfAccelTables = false; - } else - HasDwarfAccelTables = DwarfAccelTables == Enable ? true : false; + if (DwarfAccelTables == Default) + HasDwarfAccelTables = IsDarwin; + else + HasDwarfAccelTables = DwarfAccelTables == Enable; if (SplitDwarf == Default) HasSplitDwarf = false; else - HasSplitDwarf = SplitDwarf == Enable ? true : false; + HasSplitDwarf = SplitDwarf == Enable; + + if (DwarfPubSections == Default) + HasDwarfPubSections = !IsDarwin; + else + HasDwarfPubSections = DwarfPubSections == Enable; + + DwarfVersion = DwarfVersionNumber + ? DwarfVersionNumber + : getDwarfVersionFromModule(MMI->getModule()); { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(); } } -DwarfDebug::~DwarfDebug() { -} // Switch to the specified MCSection and emit an assembler // temporary label to it if SymbolStem is specified. @@ -247,48 +265,37 @@ unsigned DwarfUnits::getStringPoolIndex(StringRef Str) { return Entry.second; } -unsigned DwarfUnits::getAddrPoolIndex(MCSymbol *Sym) { - std::pair<MCSymbol*, unsigned> &Entry = AddressPool[Sym]; - if (Entry.first) return Entry.second; +unsigned DwarfUnits::getAddrPoolIndex(const MCSymbol *Sym) { + return getAddrPoolIndex(MCSymbolRefExpr::Create(Sym, Asm->OutContext)); +} - Entry.second = NextAddrPoolNumber++; - Entry.first = Sym; - return Entry.second; +unsigned DwarfUnits::getAddrPoolIndex(const MCExpr *Sym) { + std::pair<DenseMap<const MCExpr *, unsigned>::iterator, bool> P = + AddressPool.insert(std::make_pair(Sym, NextAddrPoolNumber)); + if (P.second) + ++NextAddrPoolNumber; + return P.first->second; } // Define a unique number for the abbreviation. // void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) { - // Profile the node so that we can make it unique. - FoldingSetNodeID ID; - Abbrev.Profile(ID); - // Check the set for priors. DIEAbbrev *InSet = AbbreviationsSet->GetOrInsertNode(&Abbrev); // If it's newly added. if (InSet == &Abbrev) { // Add to abbreviation list. - Abbreviations->push_back(&Abbrev); + Abbreviations.push_back(&Abbrev); // Assign the vector position + 1 as its number. - Abbrev.setNumber(Abbreviations->size()); + Abbrev.setNumber(Abbreviations.size()); } else { // Assign existing abbreviation number. Abbrev.setNumber(InSet->getNumber()); } } -// If special LLVM prefix that is used to inform the asm -// printer to not emit usual symbol prefix before the symbol name is used then -// return linkage name after skipping this special LLVM prefix. -static StringRef getRealLinkageName(StringRef LinkageName) { - char One = '\1'; - if (LinkageName.startswith(StringRef(&One, 1))) - return LinkageName.substr(1); - return LinkageName; -} - static bool isObjCClass(StringRef Name) { return Name.startswith("+") || Name.startswith("-"); } @@ -296,12 +303,7 @@ static bool isObjCClass(StringRef Name) { static bool hasObjCCategory(StringRef Name) { if (!isObjCClass(Name)) return false; - size_t pos = Name.find(')'); - if (pos != std::string::npos) { - if (Name[pos+1] != ' ') return false; - return true; - } - return false; + return Name.find(") ") != StringRef::npos; } static void getObjCClassCategory(StringRef In, StringRef &Class, @@ -321,11 +323,20 @@ static StringRef getObjCMethodName(StringRef In) { return In.slice(In.find(' ') + 1, In.find(']')); } +// Helper for sorting sections into a stable output order. +static bool SectionSort(const MCSection *A, const MCSection *B) { + std::string LA = (A ? A->getLabelBeginName() : ""); + std::string LB = (B ? B->getLabelBeginName() : ""); + return LA < LB; +} + // Add the various names to the Dwarf accelerator table names. +// TODO: Determine whether or not we should add names for programs +// that do not have a DW_AT_name or DW_AT_linkage_name field - this +// is only slightly different than the lookup of non-standard ObjC names. static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP, DIE* Die) { if (!SP.isDefinition()) return; - TheCU->addAccelName(SP.getName(), Die); // If the linkage name is different than the name, go ahead and output @@ -346,30 +357,34 @@ static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP, } } +/// isSubprogramContext - Return true if Context is either a subprogram +/// or another context nested inside a subprogram. +bool DwarfDebug::isSubprogramContext(const MDNode *Context) { + if (!Context) + return false; + DIDescriptor D(Context); + if (D.isSubprogram()) + return true; + if (D.isType()) + return isSubprogramContext(resolve(DIType(Context).getContext())); + return false; +} + // Find DIE for the given subprogram and attach appropriate DW_AT_low_pc // and DW_AT_high_pc attributes. If there are global variables in this // scope then create and insert DIEs for these variables. -DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, - const MDNode *SPNode) { - DIE *SPDie = SPCU->getDIE(SPNode); +DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP) { + DIE *SPDie = SPCU->getDIE(SP); assert(SPDie && "Unable to find subprogram DIE!"); - DISubprogram SP(SPNode); // If we're updating an abstract DIE, then we will be adding the children and // object pointer later on. But what we don't want to do is process the // concrete DIE twice. - DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode); - if (AbsSPDIE) { - bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie()); + if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) { // Pick up abstract subprogram DIE. - SPDie = new DIE(dwarf::DW_TAG_subprogram); - // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of - // DW_FORM_ref4. - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, - InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, - AbsSPDIE); - SPCU->addDie(SPDie); + SPDie = SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie()); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, AbsSPDIE); } else { DISubprogram SPDecl = SP.getFunctionDeclaration(); if (!SPDecl.isSubprogram()) { @@ -378,32 +393,31 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, // function then gdb prefers the definition at top level and but does not // expect specification DIE in parent function. So avoid creating // specification DIE for a function defined inside a function. - if (SP.isDefinition() && !SP.getContext().isCompileUnit() && - !SP.getContext().isFile() && - !isSubprogramContext(SP.getContext())) { + DIScope SPContext = resolve(SP.getContext()); + if (SP.isDefinition() && !SPContext.isCompileUnit() && + !SPContext.isFile() && + !isSubprogramContext(SPContext)) { SPCU->addFlag(SPDie, dwarf::DW_AT_declaration); // Add arguments. DICompositeType SPTy = SP.getType(); DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); + uint16_t SPTag = SPTy.getTag(); if (SPTag == dwarf::DW_TAG_subroutine_type) for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(Args.getElement(i)); + DIE *Arg = + SPCU->createAndAddDIE(dwarf::DW_TAG_formal_parameter, *SPDie); + DIType ATy(Args.getElement(i)); SPCU->addType(Arg, ATy); if (ATy.isArtificial()) SPCU->addFlag(Arg, dwarf::DW_AT_artificial); if (ATy.isObjectPointer()) - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, - dwarf::DW_FORM_ref4, Arg); - SPDie->addChild(Arg); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, Arg); } DIE *SPDeclDie = SPDie; - SPDie = new DIE(dwarf::DW_TAG_subprogram); - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, - dwarf::DW_FORM_ref4, SPDeclDie); - SPCU->addDie(SPDie); + SPDie = + SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie()); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, SPDeclDie); } } } @@ -425,40 +439,64 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, return SPDie; } +/// Check whether we should create a DIE for the given Scope, return true +/// if we don't create a DIE (the corresponding DIE is null). +bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { + if (Scope->isAbstractScope()) + return false; + + // We don't create a DIE if there is no Range. + const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges(); + if (Ranges.empty()) + return true; + + if (Ranges.size() > 1) + return false; + + // We don't create a DIE if we have a single Range and the end label + // is null. + SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(); + MCSymbol *End = getLabelAfterInsn(RI->second); + return !End; +} + // Construct new DW_TAG_lexical_block for this scope and attach // DW_AT_low_pc/DW_AT_high_pc labels. DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { + if (isLexicalScopeDIENull(Scope)) + return 0; + DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block); if (Scope->isAbstractScope()) return ScopeDIE; - const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges(); - if (Ranges.empty()) - return 0; - - SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(); + const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges(); + // If we have multiple ranges, emit them into the range section. if (Ranges.size() > 1) { // .debug_range section has not been laid out yet. Emit offset in // .debug_range as a uint, size 4, for now. emitDIE will handle // DW_AT_ranges appropriately. - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, - DebugRangeSymbols.size() - * Asm->getDataLayout().getPointerSize()); - for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), + TheCU->addSectionOffset(ScopeDIE, dwarf::DW_AT_ranges, + DebugRangeSymbols.size() * + Asm->getDataLayout().getPointerSize()); + for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); } + + // Terminate the range list. DebugRangeSymbols.push_back(NULL); DebugRangeSymbols.push_back(NULL); return ScopeDIE; } + // Construct the address range for this DIE. + SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(); MCSymbol *Start = getLabelBeforeInsn(RI->first); MCSymbol *End = getLabelAfterInsn(RI->second); - - if (End == 0) return 0; + assert(End && "End label should not be null!"); assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); assert(End->isDefined() && "Invalid end label for an inlined scope!"); @@ -473,7 +511,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, // represent this concrete inlined copy of the function. DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { - const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges(); + const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges(); assert(Ranges.empty() == false && "LexicalScope does not have instruction markers!"); @@ -487,30 +525,17 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, return NULL; } - SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(); - MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); - MCSymbol *EndLabel = getLabelAfterInsn(RI->second); - - if (StartLabel == 0 || EndLabel == 0) { - llvm_unreachable("Unexpected Start and End labels for an inlined scope!"); - } - assert(StartLabel->isDefined() && - "Invalid starting label for an inlined scope!"); - assert(EndLabel->isDefined() && - "Invalid end label for an inlined scope!"); - DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); - TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, OriginDIE); + TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, OriginDIE); if (Ranges.size() > 1) { // .debug_range section has not been laid out yet. Emit offset in // .debug_range as a uint, size 4, for now. emitDIE will handle // DW_AT_ranges appropriately. - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, - DebugRangeSymbols.size() - * Asm->getDataLayout().getPointerSize()); - for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), + TheCU->addSectionOffset(ScopeDIE, dwarf::DW_AT_ranges, + DebugRangeSymbols.size() * + Asm->getDataLayout().getPointerSize()); + for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); @@ -518,31 +543,29 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, DebugRangeSymbols.push_back(NULL); DebugRangeSymbols.push_back(NULL); } else { + SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(); + MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); + MCSymbol *EndLabel = getLabelAfterInsn(RI->second); + + if (StartLabel == 0 || EndLabel == 0) + llvm_unreachable("Unexpected Start and End labels for an inlined scope!"); + + assert(StartLabel->isDefined() && + "Invalid starting label for an inlined scope!"); + assert(EndLabel->isDefined() && "Invalid end label for an inlined scope!"); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, StartLabel); TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, EndLabel); } InlinedSubprogramDIEs.insert(OriginDIE); - // Track the start label for this inlined function. - //.debug_inlined section specification does not clearly state how - // to emit inlined scope that is split into multiple instruction ranges. - // For now, use first instruction range and emit low_pc/high_pc pair and - // corresponding .debug_inlined section entry for this pair. - DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator - I = InlineInfo.find(InlinedSP); - - if (I == InlineInfo.end()) { - InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE)); - InlinedSPNodes.push_back(InlinedSP); - } else - I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); - + // Add the call site information to the DIE. DILocation DL(Scope->getInlinedAt()); - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, None, getOrCreateSourceID(DL.getFilename(), DL.getDirectory(), TheCU->getUniqueID())); - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber()); // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_inlined_subprogram nodes. @@ -551,42 +574,49 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, return ScopeDIE; } -// Construct a DIE for this scope. -DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { - if (!Scope || !Scope->getScopeNode()) - return NULL; - - DIScope DS(Scope->getScopeNode()); - // Early return to avoid creating dangling variable|scope DIEs. - if (!Scope->getInlinedAt() && DS.isSubprogram() && Scope->isAbstractScope() && - !TheCU->getDIE(DS)) - return NULL; - - SmallVector<DIE *, 8> Children; - DIE *ObjectPointer = NULL; +DIE *DwarfDebug::createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope, + SmallVectorImpl<DIE*> &Children) { + DIE *ObjectPointer = NULL; // Collect arguments for current function. if (LScopes.isCurrentFunctionScope(Scope)) for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i) if (DbgVariable *ArgDV = CurrentFnArguments[i]) if (DIE *Arg = - TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) { + TheCU->constructVariableDIE(*ArgDV, Scope->isAbstractScope())) { Children.push_back(Arg); if (ArgDV->isObjectPointer()) ObjectPointer = Arg; } // Collect lexical scope children first. - const SmallVector<DbgVariable *, 8> &Variables = ScopeVariables.lookup(Scope); + const SmallVectorImpl<DbgVariable *> &Variables =ScopeVariables.lookup(Scope); for (unsigned i = 0, N = Variables.size(); i < N; ++i) if (DIE *Variable = - TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) { + TheCU->constructVariableDIE(*Variables[i], Scope->isAbstractScope())) { Children.push_back(Variable); if (Variables[i]->isObjectPointer()) ObjectPointer = Variable; } - const SmallVector<LexicalScope *, 4> &Scopes = Scope->getChildren(); + const SmallVectorImpl<LexicalScope *> &Scopes = Scope->getChildren(); for (unsigned j = 0, M = Scopes.size(); j < M; ++j) if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j])) Children.push_back(Nested); + return ObjectPointer; +} + +// Construct a DIE for this scope. +DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { + if (!Scope || !Scope->getScopeNode()) + return NULL; + + DIScope DS(Scope->getScopeNode()); + + SmallVector<DIE *, 8> Children; + DIE *ObjectPointer = NULL; + bool ChildrenCreated = false; + + // We try to create the scope DIE first, then the children DIEs. This will + // avoid creating un-used children then removing them later when we find out + // the scope DIE is null. DIE *ScopeDIE = NULL; if (Scope->getInlinedAt()) ScopeDIE = constructInlinedScopeDIE(TheCU, Scope); @@ -597,34 +627,49 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { // Note down abstract DIE. if (ScopeDIE) AbstractSPDies.insert(std::make_pair(DS, ScopeDIE)); - } - else - ScopeDIE = updateSubprogramScopeDIE(TheCU, DS); - } - else { + } else + ScopeDIE = updateSubprogramScopeDIE(TheCU, DISubprogram(DS)); + } else { + // Early exit when we know the scope DIE is going to be null. + if (isLexicalScopeDIENull(Scope)) + return NULL; + + // We create children here when we know the scope DIE is not going to be + // null and the children will be added to the scope DIE. + ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children); + ChildrenCreated = true; + // There is no need to emit empty lexical block DIE. std::pair<ImportedEntityMap::const_iterator, ImportedEntityMap::const_iterator> Range = std::equal_range( ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(), std::pair<const MDNode *, const MDNode *>(DS, (const MDNode*)0), - CompareFirst()); + less_first()); if (Children.empty() && Range.first == Range.second) return NULL; ScopeDIE = constructLexicalScopeDIE(TheCU, Scope); - for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; ++i) - constructImportedModuleDIE(TheCU, i->second, ScopeDIE); + assert(ScopeDIE && "Scope DIE should not be null."); + for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; + ++i) + constructImportedEntityDIE(TheCU, i->second, ScopeDIE); } - if (!ScopeDIE) return NULL; + if (!ScopeDIE) { + assert(Children.empty() && + "We create children only when the scope DIE is not null."); + return NULL; + } + if (!ChildrenCreated) + // We create children when the scope DIE is not null. + ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children); // Add children - for (SmallVector<DIE *, 8>::iterator I = Children.begin(), + for (SmallVectorImpl<DIE *>::iterator I = Children.begin(), E = Children.end(); I != E; ++I) ScopeDIE->addChild(*I); if (DS.isSubprogram() && ObjectPointer != NULL) - TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, - dwarf::DW_FORM_ref4, ObjectPointer); + TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, ObjectPointer); if (DS.isSubprogram()) TheCU->addPubTypes(DISubprogram(DS)); @@ -640,8 +685,10 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, StringRef DirName, unsigned CUID) { // If we use .loc in assembly, we can't separate .file entries according to // compile units. Thus all files will belong to the default compile unit. - if (Asm->TM.hasMCUseLoc() && - Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) + + // FIXME: add a better feature test than hasRawTextSupport. Even better, + // extend .file to support this. + if (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) CUID = 0; // If FE did not provide a file name, then assume stdin. @@ -676,14 +723,12 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, // Create new CompileUnit for the given metadata node with tag // DW_TAG_compile_unit. -CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { - DICompileUnit DIUnit(N); +CompileUnit *DwarfDebug::constructCompileUnit(DICompileUnit DIUnit) { StringRef FN = DIUnit.getFilename(); CompilationDir = DIUnit.getDirectory(); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, - DIUnit.getLanguage(), Die, Asm, + CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, Die, DIUnit, Asm, this, &InfoHolder); FileIDCUMap[NewCU->getUniqueID()] = 0; @@ -710,31 +755,56 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { // Use a single line table if we are using .loc and generating assembly. bool UseTheFirstCU = - (Asm->TM.hasMCUseLoc() && - Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) || - (NewCU->getUniqueID() == 0); + (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) || + (NewCU->getUniqueID() == 0); - // DW_AT_stmt_list is a offset of line number information for this - // compile unit in debug_line section. For split dwarf this is - // left in the skeleton CU and so not included. - // The line table entries are not always emitted in assembly, so it - // is not okay to use line_table_start here. if (!useSplitDwarf()) { + // DW_AT_stmt_list is a offset of line number information for this + // compile unit in debug_line section. For split dwarf this is + // left in the skeleton CU and so not included. + // The line table entries are not always emitted in assembly, so it + // is not okay to use line_table_start here. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - UseTheFirstCU ? - Asm->GetTempSymbol("section_line") : LineTableStartSym); + NewCU->addSectionLabel( + Die, dwarf::DW_AT_stmt_list, + UseTheFirstCU ? Asm->GetTempSymbol("section_line") + : LineTableStartSym); else if (UseTheFirstCU) - NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + NewCU->addSectionOffset(Die, dwarf::DW_AT_stmt_list, 0); else - NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - LineTableStartSym, DwarfLineSectionSym); + NewCU->addSectionDelta(Die, dwarf::DW_AT_stmt_list, + LineTableStartSym, DwarfLineSectionSym); + + // If we're using split dwarf the compilation dir is going to be in the + // skeleton CU and so we don't need to duplicate it here. + if (!CompilationDir.empty()) + NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + + // Flags to let the linker know we have emitted new style pubnames. Only + // emit it here if we don't have a skeleton CU for split dwarf. + if (GenerateGnuPubSections) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addSectionLabel( + Die, dwarf::DW_AT_GNU_pubnames, + Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID())); + else + NewCU->addSectionDelta( + Die, dwarf::DW_AT_GNU_pubnames, + Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID()), + DwarfGnuPubNamesSectionSym); + + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addSectionLabel( + Die, dwarf::DW_AT_GNU_pubtypes, + Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID())); + else + NewCU->addSectionDelta( + Die, dwarf::DW_AT_GNU_pubtypes, + Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID()), + DwarfGnuPubTypesSectionSym); + } } - // If we're using split dwarf the compilation dir is going to be in the - // skeleton CU and so we don't need to duplicate it here. - if (!useSplitDwarf() && !CompilationDir.empty()) - NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); if (DIUnit.isOptimized()) NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized); @@ -751,13 +821,17 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { InfoHolder.addUnit(NewCU); - CUMap.insert(std::make_pair(N, NewCU)); + CUMap.insert(std::make_pair(DIUnit, NewCU)); + CUDieMap.insert(std::make_pair(Die, NewCU)); return NewCU; } // Construct subprogram DIE. -void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, - const MDNode *N) { +void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) { + // FIXME: We should only call this routine once, however, during LTO if a + // program is defined in multiple CUs we could end up calling it out of + // beginModule as we walk the CUs. + CompileUnit *&CURef = SPMap[N]; if (CURef) return; @@ -771,49 +845,54 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, DIE *SubprogramDie = TheCU->getOrCreateSubprogramDIE(SP); - // Add to map. - TheCU->insertDIE(N, SubprogramDie); - - // Add to context owner. - TheCU->addToContextOwner(SubprogramDie, SP.getContext()); - - // Expose as global, if requested. - if (GenerateDwarfPubNamesSection) - TheCU->addGlobalName(SP.getName(), SubprogramDie); + // Expose as a global name. + TheCU->addGlobalName(SP.getName(), SubprogramDie, resolve(SP.getContext())); } -void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, +void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N) { - DIImportedModule Module(N); + DIImportedEntity Module(N); if (!Module.Verify()) return; if (DIE *D = TheCU->getOrCreateContextDIE(Module.getContext())) - constructImportedModuleDIE(TheCU, Module, D); + constructImportedEntityDIE(TheCU, Module, D); } -void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N, +void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N, DIE *Context) { - DIImportedModule Module(N); + DIImportedEntity Module(N); if (!Module.Verify()) return; - return constructImportedModuleDIE(TheCU, Module, Context); + return constructImportedEntityDIE(TheCU, Module, Context); } -void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, - const DIImportedModule &Module, +void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, + const DIImportedEntity &Module, DIE *Context) { assert(Module.Verify() && "Use one of the MDNode * overloads to handle invalid metadata"); assert(Context && "Should always have a context for an imported_module"); - DIE *IMDie = new DIE(dwarf::DW_TAG_imported_module); + DIE *IMDie = new DIE(Module.getTag()); TheCU->insertDIE(Module, IMDie); - DIE *NSDie = TheCU->getOrCreateNameSpace(Module.getNameSpace()); + DIE *EntityDie; + DIDescriptor Entity = Module.getEntity(); + if (Entity.isNameSpace()) + EntityDie = TheCU->getOrCreateNameSpace(DINameSpace(Entity)); + else if (Entity.isSubprogram()) + EntityDie = TheCU->getOrCreateSubprogramDIE(DISubprogram(Entity)); + else if (Entity.isType()) + EntityDie = TheCU->getOrCreateTypeDIE(DIType(Entity)); + else + EntityDie = TheCU->getDIE(Entity); unsigned FileID = getOrCreateSourceID(Module.getContext().getFilename(), Module.getContext().getDirectory(), TheCU->getUniqueID()); - TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, 0, FileID); - TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, 0, Module.getLineNumber()); - TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, dwarf::DW_FORM_ref4, NSDie); + TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, None, FileID); + TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, None, Module.getLineNumber()); + TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, EntityDie); + StringRef Name = Module.getName(); + if (!Name.empty()) + TheCU->addString(IMDie, dwarf::DW_AT_name, Name); Context->addChild(IMDie); } @@ -831,6 +910,7 @@ void DwarfDebug::beginModule() { NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return; + TypeIdentifierMap = generateDITypeIdentifierMap(CU_Nodes); // Emit initial sections so we can reference labels later. emitSectionLabels(); @@ -838,16 +918,16 @@ void DwarfDebug::beginModule() { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { DICompileUnit CUNode(CU_Nodes->getOperand(i)); CompileUnit *CU = constructCompileUnit(CUNode); - DIArray ImportedModules = CUNode.getImportedModules(); - for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i) + DIArray ImportedEntities = CUNode.getImportedEntities(); + for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) ScopesWithImportedEntities.push_back(std::make_pair( - DIImportedModule(ImportedModules.getElement(i)).getContext(), - ImportedModules.getElement(i))); + DIImportedEntity(ImportedEntities.getElement(i)).getContext(), + ImportedEntities.getElement(i))); std::sort(ScopesWithImportedEntities.begin(), - ScopesWithImportedEntities.end(), CompareFirst()); + ScopesWithImportedEntities.end(), less_first()); DIArray GVs = CUNode.getGlobalVariables(); for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) - CU->createGlobalVariableDIE(GVs.getElement(i)); + CU->createGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i))); DIArray SPs = CUNode.getSubprograms(); for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) constructSubprogramDIE(CU, SPs.getElement(i)); @@ -859,24 +939,15 @@ void DwarfDebug::beginModule() { CU->getOrCreateTypeDIE(RetainedTypes.getElement(i)); // Emit imported_modules last so that the relevant context is already // available. - for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i) - constructImportedModuleDIE(CU, ImportedModules.getElement(i)); - // If we're splitting the dwarf out now that we've got the entire - // CU then construct a skeleton CU based upon it. - if (useSplitDwarf()) { - // This should be a unique identifier when we want to build .dwp files. - CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, - dwarf::DW_FORM_data8, 0); - // Now construct the skeleton CU associated. - constructSkeletonCU(CUNode); - } + for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) + constructImportedEntityDIE(CU, ImportedEntities.getElement(i)); } // Tell MMI that we have debug info. MMI->setDebugInfoAvailability(true); // Prime section data. - SectionMap.insert(Asm->getObjFileLowering().getTextSection()); + SectionMap[Asm->getObjFileLowering().getTextSection()]; } // Attach DW_AT_inline attribute with inlined subprogram DIEs. @@ -885,21 +956,20 @@ void DwarfDebug::computeInlinedDIEs() { for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(), AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { DIE *ISP = *AI; - FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); } for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(), AE = AbstractSPDies.end(); AI != AE; ++AI) { DIE *ISP = AI->second; if (InlinedSubprogramDIEs.count(ISP)) continue; - FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); } } // Collect info for variables that were optimized out. void DwarfDebug::collectDeadVariables() { const Module *M = MMI->getModule(); - DenseMap<const MDNode *, LexicalScope *> DeadFnScopeMap; if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { @@ -907,33 +977,70 @@ void DwarfDebug::collectDeadVariables() { DIArray Subprograms = TheCU.getSubprograms(); for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { DISubprogram SP(Subprograms.getElement(i)); - if (ProcessedSPNodes.count(SP) != 0) continue; - if (!SP.Verify()) continue; - if (!SP.isDefinition()) continue; + if (ProcessedSPNodes.count(SP) != 0) + continue; + if (!SP.isSubprogram()) + continue; + if (!SP.isDefinition()) + continue; DIArray Variables = SP.getVariables(); - if (Variables.getNumElements() == 0) continue; - - LexicalScope *Scope = - new LexicalScope(NULL, DIDescriptor(SP), NULL, false); - DeadFnScopeMap[SP] = Scope; + if (Variables.getNumElements() == 0) + continue; // Construct subprogram DIE and add variables DIEs. CompileUnit *SPCU = CUMap.lookup(TheCU); assert(SPCU && "Unable to find Compile Unit!"); + // FIXME: See the comment in constructSubprogramDIE about duplicate + // subprogram DIEs. constructSubprogramDIE(SPCU, SP); - DIE *ScopeDIE = SPCU->getDIE(SP); + DIE *SPDIE = SPCU->getDIE(SP); for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { DIVariable DV(Variables.getElement(vi)); - if (!DV.Verify()) continue; - DbgVariable *NewVar = new DbgVariable(DV, NULL); + if (!DV.isVariable()) + continue; + DbgVariable NewVar(DV, NULL, this); if (DIE *VariableDIE = - SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope())) - ScopeDIE->addChild(VariableDIE); + SPCU->constructVariableDIE(NewVar, false)) + SPDIE->addChild(VariableDIE); } } } } - DeleteContainerSeconds(DeadFnScopeMap); +} + +// Type Signature [7.27] and ODR Hash code. + +/// \brief Grabs the string in whichever attribute is passed in and returns +/// a reference to it. Returns "" if the attribute doesn't exist. +static StringRef getDIEStringAttr(DIE *Die, unsigned Attr) { + DIEValue *V = Die->findAttribute(Attr); + + if (DIEString *S = dyn_cast_or_null<DIEString>(V)) + return S->getString(); + + return StringRef(""); +} + +/// Return true if the current DIE is contained within an anonymous namespace. +static bool isContainedInAnonNamespace(DIE *Die) { + DIE *Parent = Die->getParent(); + + while (Parent) { + if (Parent->getTag() == dwarf::DW_TAG_namespace && + getDIEStringAttr(Parent, dwarf::DW_AT_name) == "") + return true; + Parent = Parent->getParent(); + } + + return false; +} + +/// Test if the current CU language is C++ and that we have +/// a named type that is not contained in an anonymous namespace. +static bool shouldAddODRHash(CompileUnit *CU, DIE *Die) { + return CU->getLanguage() == dwarf::DW_LANG_C_plus_plus && + getDIEStringAttr(Die, dwarf::DW_AT_name) != "" && + !isContainedInAnonNamespace(Die); } void DwarfDebug::finalizeModuleInfo() { @@ -943,31 +1050,102 @@ void DwarfDebug::finalizeModuleInfo() { // Attach DW_AT_inline attribute with inlined subprogram DIEs. computeInlinedDIEs(); - // Emit DW_AT_containing_type attribute to connect types with their - // vtable holding type. + // Split out type units and conditionally add an ODR tag to the split + // out type. + // FIXME: Do type splitting. + for (unsigned i = 0, e = TypeUnits.size(); i != e; ++i) { + DIE *Die = TypeUnits[i]; + DIEHash Hash; + // If we've requested ODR hashes and it's applicable for an ODR hash then + // add the ODR signature now. + // FIXME: This should be added onto the type unit, not the type, but this + // works as an intermediate stage. + if (GenerateODRHash && shouldAddODRHash(CUMap.begin()->second, Die)) + CUMap.begin()->second->addUInt(Die, dwarf::DW_AT_GNU_odr_signature, + dwarf::DW_FORM_data8, + Hash.computeDIEODRSignature(*Die)); + } + + // Handle anything that needs to be done on a per-cu basis. for (DenseMap<const MDNode *, CompileUnit *>::iterator CUI = CUMap.begin(), - CUE = CUMap.end(); CUI != CUE; ++CUI) { + CUE = CUMap.end(); + CUI != CUE; ++CUI) { CompileUnit *TheCU = CUI->second; + // Emit DW_AT_containing_type attribute to connect types with their + // vtable holding type. TheCU->constructContainingTypeDIEs(); + + // If we're splitting the dwarf out now that we've got the entire + // CU then construct a skeleton CU based upon it. + if (useSplitDwarf()) { + uint64_t ID = 0; + if (GenerateCUHash) { + DIEHash CUHash; + ID = CUHash.computeCUSignature(*TheCU->getCUDie()); + } + // This should be a unique identifier when we want to build .dwp files. + TheCU->addUInt(TheCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + // Now construct the skeleton CU associated. + CompileUnit *SkCU = constructSkeletonCU(TheCU); + // This should be a unique identifier when we want to build .dwp files. + SkCU->addUInt(SkCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + } } - // Compute DIE offsets and sizes. + // Compute DIE offsets and sizes. InfoHolder.computeSizeAndOffsets(); if (useSplitDwarf()) SkeletonHolder.computeSizeAndOffsets(); } void DwarfDebug::endSections() { - // Standard sections final addresses. - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection()); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end")); - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection()); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end")); + // Filter labels by section. + for (size_t n = 0; n < ArangeLabels.size(); n++) { + const SymbolCU &SCU = ArangeLabels[n]; + if (SCU.Sym->isInSection()) { + // Make a note of this symbol and it's section. + const MCSection *Section = &SCU.Sym->getSection(); + if (!Section->getKind().isMetadata()) + SectionMap[Section].push_back(SCU); + } else { + // Some symbols (e.g. common/bss on mach-o) can have no section but still + // appear in the output. This sucks as we rely on sections to build + // arange spans. We can do it without, but it's icky. + SectionMap[NULL].push_back(SCU); + } + } - // End text sections. - for (unsigned I = 0, E = SectionMap.size(); I != E; ++I) { - Asm->OutStreamer.SwitchSection(SectionMap[I]); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", I+1)); + // Build a list of sections used. + std::vector<const MCSection *> Sections; + for (SectionMapType::iterator it = SectionMap.begin(); it != SectionMap.end(); + it++) { + const MCSection *Section = it->first; + Sections.push_back(Section); + } + + // Sort the sections into order. + // This is only done to ensure consistent output order across different runs. + std::sort(Sections.begin(), Sections.end(), SectionSort); + + // Add terminating symbols for each section. + for (unsigned ID=0;ID<Sections.size();ID++) { + const MCSection *Section = Sections[ID]; + MCSymbol *Sym = NULL; + + if (Section) { + // We can't call MCSection::getLabelEndName, as it's only safe to do so + // if we know the section name up-front. For user-created sections, the resulting + // label may not be valid to use as a label. (section names can use a greater + // set of characters on some systems) + Sym = Asm->GetTempSymbol("debug_end", ID); + Asm->OutStreamer.SwitchSection(Section); + Asm->OutStreamer.EmitLabel(Sym); + } + + // Insert a final terminator. + SectionMap[Section].push_back(SymbolCU(NULL, Sym)); } } @@ -984,6 +1162,8 @@ void DwarfDebug::endModule() { finalizeModuleInfo(); if (!useSplitDwarf()) { + emitDebugStr(); + // Emit all the DIEs into a debug info section. emitDebugInfo(); @@ -1002,15 +1182,12 @@ void DwarfDebug::endModule() { // Emit info into a debug macinfo section. emitDebugMacInfo(); - // Emit inline info. - // TODO: When we don't need the option anymore we - // can remove all of the code that this section - // depends upon. - if (useDarwinGDBCompat()) - emitDebugInlineInfo(); } else { // TODO: Fill this in for separated debug sections and separate // out information into new sections. + emitDebugStr(); + if (useSplitDwarf()) + emitDebugStrDWO(); // Emit the debug info section and compile units. emitDebugInfo(); @@ -1035,12 +1212,6 @@ void DwarfDebug::endModule() { // Emit DWO addresses. InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection()); - // Emit inline info. - // TODO: When we don't need the option anymore we - // can remove all of the code that this section - // depends upon. - if (useDarwinGDBCompat()) - emitDebugInlineInfo(); } // Emit info into the dwarf accelerator table sections. @@ -1051,20 +1222,11 @@ void DwarfDebug::endModule() { emitAccelTypes(); } - // Emit info into a debug pubnames section, if requested. - if (GenerateDwarfPubNamesSection) - emitDebugPubnames(); - - // Emit info into a debug pubtypes section. - // TODO: When we don't need the option anymore we can - // remove all of the code that adds to the table. - if (useDarwinGDBCompat()) - emitDebugPubTypes(); - - // Finally emit string information into a string table. - emitDebugStr(); - if (useSplitDwarf()) - emitDebugStrDWO(); + // Emit the pubnames and pubtypes sections if requested. + if (HasDwarfPubSections) { + emitDebugPubNames(GenerateGnuPubSections); + emitDebugPubTypes(GenerateGnuPubSections); + } // clean up. SPMap.clear(); @@ -1072,7 +1234,7 @@ void DwarfDebug::endModule() { E = CUMap.end(); I != E; ++I) delete I->second; - for (SmallVector<CompileUnit *, 1>::iterator I = SkeletonCUs.begin(), + for (SmallVectorImpl<CompileUnit *>::iterator I = SkeletonCUs.begin(), E = SkeletonCUs.end(); I != E; ++I) delete *I; @@ -1094,7 +1256,7 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, if (!Scope) return NULL; - AbsDbgVariable = new DbgVariable(Var, NULL); + AbsDbgVariable = new DbgVariable(Var, NULL, this); addScopeVariable(Scope, AbsDbgVariable); AbstractVariables[Var] = AbsDbgVariable; return AbsDbgVariable; @@ -1143,7 +1305,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF, continue; DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second); - DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable); + DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable, this); RegVar->setFrameIndex(VP.first); if (!addCurrentFnArgument(MF, RegVar, Scope)) addScopeVariable(Scope, RegVar); @@ -1158,7 +1320,8 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) { assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); return MI->getNumOperands() == 3 && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() && - MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0; + (MI->getOperand(1).isImm() || + (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == 0U)); } // Get .debug_loc entry for the instruction range starting at MI. @@ -1168,16 +1331,12 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, const MachineInstr *MI) { const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata(); - if (MI->getNumOperands() != 3) { - MachineLocation MLoc = Asm->getDebugValueLocation(MI); - return DotDebugLocEntry(FLabel, SLabel, MLoc, Var); - } - if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) { + assert(MI->getNumOperands() == 3); + if (MI->getOperand(0).isReg()) { MachineLocation MLoc; - // TODO: Currently an offset of 0 in a DBG_VALUE means - // we need to generate a direct register value. - // There is no way to specify an indirect value with offset 0. - if (MI->getOperand(1).getImm() == 0) + // If the second operand is an immediate, this is a + // register-indirect address. + if (!MI->getOperand(1).isImm()) MLoc.set(MI->getOperand(0).getReg()); else MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); @@ -1198,7 +1357,7 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF, SmallPtrSet<const MDNode *, 16> &Processed) { - // collection info from MMI table. + // Grab the variable info that was squirreled away in the MMI side-table. collectVariableInfoFromMMITable(MF, Processed); for (SmallVectorImpl<const MDNode*>::const_iterator @@ -1231,7 +1390,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, Processed.insert(DV); assert(MInsn->isDebugValue() && "History must begin with debug value"); DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc()); - DbgVariable *RegVar = new DbgVariable(DV, AbsVar); + DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this); if (!addCurrentFnArgument(MF, RegVar, Scope)) addScopeVariable(Scope, RegVar); if (AbsVar) @@ -1291,10 +1450,10 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables(); for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { DIVariable DV(Variables.getElement(i)); - if (!DV || !DV.Verify() || !Processed.insert(DV)) + if (!DV || !DV.isVariable() || !Processed.insert(DV)) continue; if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) - addScopeVariable(Scope, new DbgVariable(DV, NULL)); + addScopeVariable(Scope, new DbgVariable(DV, NULL, this)); } } @@ -1388,19 +1547,19 @@ void DwarfDebug::identifyScopeMarkers() { while (!WorkList.empty()) { LexicalScope *S = WorkList.pop_back_val(); - const SmallVector<LexicalScope *, 4> &Children = S->getChildren(); + const SmallVectorImpl<LexicalScope *> &Children = S->getChildren(); if (!Children.empty()) - for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(), + for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(), SE = Children.end(); SI != SE; ++SI) WorkList.push_back(*SI); if (S->isAbstractScope()) continue; - const SmallVector<InsnRange, 4> &Ranges = S->getRanges(); + const SmallVectorImpl<InsnRange> &Ranges = S->getRanges(); if (Ranges.empty()) continue; - for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), + for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { assert(RI->first && "InsnRange does not have first instruction!"); assert(RI->second && "InsnRange does not have second instruction!"); @@ -1422,7 +1581,7 @@ static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) { static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { const MDNode *Scope = getScopeNode(DL, Ctx); DISubprogram SP = getDISubprogram(Scope); - if (SP.Verify()) { + if (SP.isSubprogram()) { // Check for number of operands since the compatibility is // cheap here. if (SP->getNumOperands() > 19) @@ -1437,36 +1596,45 @@ static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { // Gather pre-function debug information. Assumes being called immediately // after the function entry point has been emitted. void DwarfDebug::beginFunction(const MachineFunction *MF) { - if (!MMI->hasDebugInfo()) return; + + // If there's no debug info for the function we're not going to do anything. + if (!MMI->hasDebugInfo()) + return; + + // Grab the lexical scopes for the function, if we don't have any of those + // then we're not going to be able to do anything. LScopes.initialize(*MF); - if (LScopes.empty()) return; + if (LScopes.empty()) + return; + + assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); + + // Make sure that each lexical scope will have a begin/end label. identifyScopeMarkers(); // Set DwarfCompileUnitID in MCContext to the Compile Unit this function - // belongs to. + // belongs to so that we add to the correct per-cu line table in the + // non-asm case. LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); assert(TheCU && "Unable to find compile unit!"); - if (Asm->TM.hasMCUseLoc() && - Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) + if (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) // Use a single line table if we are using .loc and generating assembly. Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); else Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); - FunctionBeginSym = Asm->GetTempSymbol("func_begin", - Asm->getFunctionNumber()); + // Emit a label for the function so that we have a beginning address. + FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()); // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionBeginSym); - assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); - const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); // LiveUserVar - Map physreg numbers to the MDNode they contain. - std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs()); + std::vector<const MDNode *> LiveUserVar(TRI->getNumRegs()); - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) { + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; + ++I) { bool AtBlockEntry = true; for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { @@ -1477,22 +1645,21 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Keep track of user variables. const MDNode *Var = - MI->getOperand(MI->getNumOperands() - 1).getMetadata(); + MI->getOperand(MI->getNumOperands() - 1).getMetadata(); // Variable is in a register, we need to check for clobbers. if (isDbgValueInDefinedReg(MI)) LiveUserVar[MI->getOperand(0).getReg()] = Var; // Check the history of this variable. - SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var]; + SmallVectorImpl<const MachineInstr *> &History = DbgValues[Var]; if (History.empty()) { UserVariables.push_back(Var); // The first mention of a function argument gets the FunctionBeginSym // label, so arguments are visible when breaking at function entry. DIVariable DV(Var); - if (DV.Verify() && DV.getTag() == dwarf::DW_TAG_arg_variable && - DISubprogram(getDISubprogram(DV.getContext())) - .describes(MF->getFunction())) + if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable && + getDISubprogram(DV.getContext()).describes(MF->getFunction())) LabelsBeforeInsn[MI] = FunctionBeginSym; } else { // We have seen this variable before. Try to coalesce DBG_VALUEs. @@ -1502,8 +1669,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (History.size() >= 2 && Prev->isIdenticalTo(History[History.size() - 2])) { DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" - << "\t" << *Prev - << "\t" << *History[History.size() - 2] << "\n"); + << "\t" << *Prev << "\t" + << *History[History.size() - 2] << "\n"); History.pop_back(); } @@ -1514,17 +1681,15 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Previous register assignment needs to terminate at the end of // its basic block. MachineBasicBlock::const_iterator LastMI = - PrevMBB->getLastNonDebugInstr(); + PrevMBB->getLastNonDebugInstr(); if (LastMI == PrevMBB->end()) { // Drop DBG_VALUE for empty range. DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n" - << "\t" << *Prev << "\n"); + << "\t" << *Prev << "\n"); History.pop_back(); - } - else { + } else if (llvm::next(PrevMBB) != PrevMBB->getParent()->end()) // Terminate after LastMI. History.push_back(LastMI); - } } } } @@ -1542,11 +1707,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Check if the instruction clobbers any registers with debug vars. for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { + MOE = MI->operands_end(); + MOI != MOE; ++MOI) { if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg()) continue; - for (MCRegAliasIterator AI(MOI->getReg(), TRI, true); - AI.isValid(); ++AI) { + for (MCRegAliasIterator AI(MOI->getReg(), TRI, true); AI.isValid(); + ++AI) { unsigned Reg = *AI; const MDNode *Var = LiveUserVar[Reg]; if (!Var) @@ -1558,7 +1724,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { DbgValueHistoryMap::iterator HistI = DbgValues.find(Var); if (HistI == DbgValues.end()) continue; - SmallVectorImpl<const MachineInstr*> &History = HistI->second; + SmallVectorImpl<const MachineInstr *> &History = HistI->second; if (History.empty()) continue; const MachineInstr *Prev = History.back(); @@ -1580,7 +1746,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end(); I != E; ++I) { - SmallVectorImpl<const MachineInstr*> &History = I->second; + SmallVectorImpl<const MachineInstr *> &History = I->second; if (History.empty()) continue; @@ -1589,11 +1755,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) { const MachineBasicBlock *PrevMBB = Prev->getParent(); MachineBasicBlock::const_iterator LastMI = - PrevMBB->getLastNonDebugInstr(); + PrevMBB->getLastNonDebugInstr(); if (LastMI == PrevMBB->end()) // Drop DBG_VALUE for empty range. History.pop_back(); - else { + else if (PrevMBB != &PrevMBB->getParent()->back()) { // Terminate after LastMI. History.push_back(LastMI); } @@ -1613,45 +1779,43 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Record beginning of function. if (!PrologEndLoc.isUnknown()) { - DebugLoc FnStartDL = getFnDebugLoc(PrologEndLoc, - MF->getFunction()->getContext()); - recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), - FnStartDL.getScope(MF->getFunction()->getContext()), - // We'd like to list the prologue as "not statements" but GDB behaves - // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. - DWARF2_FLAG_IS_STMT); + DebugLoc FnStartDL = + getFnDebugLoc(PrologEndLoc, MF->getFunction()->getContext()); + recordSourceLine( + FnStartDL.getLine(), FnStartDL.getCol(), + FnStartDL.getScope(MF->getFunction()->getContext()), + // We'd like to list the prologue as "not statements" but GDB behaves + // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. + DWARF2_FLAG_IS_STMT); } } void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS]; DIVariable DV = Var->getVariable(); - if (DV.getTag() == dwarf::DW_TAG_arg_variable) { - DISubprogram Ctxt(DV.getContext()); - DIArray Variables = Ctxt.getVariables(); - // If the variable is a parameter (arg_variable) and this is an optimized - // build (the subprogram has a 'variables' list) make sure we keep the - // parameters in order. Otherwise we would produce an incorrect function - // type with parameters out of order if function parameters were used out of - // order or unused (see the call to addScopeVariable in endFunction where - // the remaining unused variables (including parameters) are added). - if (unsigned NumVariables = Variables.getNumElements()) { - // Keep the parameters at the start of the variables list. Search through - // current variable list (Vars) and the full function variable list in - // lock-step looking for this parameter in the full list to find the - // insertion point. - SmallVectorImpl<DbgVariable *>::iterator I = Vars.begin(); - unsigned j = 0; - while (I != Vars.end() && j != NumVariables && - Variables.getElement(j) != DV && - (*I)->getVariable().getTag() == dwarf::DW_TAG_arg_variable) { - if (Variables.getElement(j) == (*I)->getVariable()) - ++I; - ++j; - } - Vars.insert(I, Var); - return; + // Variables with positive arg numbers are parameters. + if (unsigned ArgNum = DV.getArgNumber()) { + // Keep all parameters in order at the start of the variable list to ensure + // function types are correct (no out-of-order parameters) + // + // This could be improved by only doing it for optimized builds (unoptimized + // builds have the right order to begin with), searching from the back (this + // would catch the unoptimized case quickly), or doing a binary search + // rather than linear search. + SmallVectorImpl<DbgVariable *>::iterator I = Vars.begin(); + while (I != Vars.end()) { + unsigned CurNum = (*I)->getVariable().getArgNumber(); + // A local (non-parameter) variable has been found, insert immediately + // before it. + if (CurNum == 0) + break; + // A later indexed parameter has been found, insert immediately before it. + if (CurNum > ArgNum) + break; + ++I; } + Vars.insert(I, Var); + return; } Vars.push_back(Var); @@ -1681,12 +1845,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { for (unsigned i = 0, e = AList.size(); i != e; ++i) { LexicalScope *AScope = AList[i]; DISubprogram SP(AScope->getScopeNode()); - if (SP.Verify()) { + if (SP.isSubprogram()) { // Collect info for variables that were optimized out. DIArray Variables = SP.getVariables(); for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { DIVariable DV(Variables.getElement(i)); - if (!DV || !DV.Verify() || !ProcessedVars.insert(DV)) + if (!DV || !DV.isVariable() || !ProcessedVars.insert(DV)) continue; // Check that DbgVariable for DV wasn't created earlier, when // findAbstractVariable() was called for inlined instance of DV. @@ -1695,7 +1859,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { if (AbstractVariables.lookup(CleanDV)) continue; if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext())) - addScopeVariable(Scope, new DbgVariable(DV, NULL)); + addScopeVariable(Scope, new DbgVariable(DV, NULL, this)); } } if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0) @@ -1707,11 +1871,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { if (!MF->getTarget().Options.DisableFramePointerElim(*MF)) TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); - DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(), - MMI->getFrameMoves())); - // Clear debug info - for (DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> >::iterator + for (ScopeVariablesMap::iterator I = ScopeVariables.begin(), E = ScopeVariables.end(); I != E; ++I) DeleteContainerPointers(I->second); ScopeVariables.clear(); @@ -1767,7 +1928,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, // Emit Methods //===----------------------------------------------------------------------===// -// Compute the size and offset of a DIE. +// Compute the size and offset of a DIE. The offset is relative to start of the +// CU. It returns the offset after laying out the DIE. unsigned DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Get the children. @@ -1778,7 +1940,7 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Get the abbreviation for this DIE. unsigned AbbrevNumber = Die->getAbbrevNumber(); - const DIEAbbrev *Abbrev = Abbreviations->at(AbbrevNumber - 1); + const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; // Set DIE offset Die->setOffset(Offset); @@ -1810,21 +1972,25 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { return Offset; } -// Compute the size and offset of all the DIEs. +// Compute the size and offset for each DIE. void DwarfUnits::computeSizeAndOffsets() { - // Offset from the beginning of debug info section. - unsigned AccuOffset = 0; + // Offset from the first CU in the debug info section is 0 initially. + unsigned SecOffset = 0; + + // Iterate over each compile unit and set the size and offsets for each + // DIE within each compile unit. All offsets are CU relative. for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), E = CUs.end(); I != E; ++I) { - (*I)->setDebugInfoOffset(AccuOffset); - unsigned Offset = - sizeof(int32_t) + // Length of Compilation Unit Info - sizeof(int16_t) + // DWARF version number - sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t); // Pointer Size (in bytes) + (*I)->setDebugInfoOffset(SecOffset); + // CU-relative offset is reset to 0 here. + unsigned Offset = sizeof(int32_t) + // Length of Unit Info + (*I)->getHeaderSize(); // Unit-specific headers + + // EndOffset here is CU-relative, after laying out + // all of the CU DIE. unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset); - AccuOffset += EndOffset; + SecOffset += EndOffset; } } @@ -1849,9 +2015,16 @@ void DwarfDebug::emitSectionLabels() { DwarfLineSectionSym = emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); emitSectionSym(Asm, TLOF.getDwarfLocSection()); - if (GenerateDwarfPubNamesSection) + if (GenerateGnuPubSections) { + DwarfGnuPubNamesSectionSym = + emitSectionSym(Asm, TLOF.getDwarfGnuPubNamesSection()); + DwarfGnuPubTypesSectionSym = + emitSectionSym(Asm, TLOF.getDwarfGnuPubTypesSection()); + } else if (HasDwarfPubSections) { emitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); - emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); + emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); + } + DwarfStrSectionSym = emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); if (useSplitDwarf()) { @@ -1871,10 +2044,10 @@ void DwarfDebug::emitSectionLabels() { } // Recursively emits a debug information entry. -void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) { +void DwarfDebug::emitDIE(DIE *Die, ArrayRef<DIEAbbrev *> Abbrevs) { // Get the abbreviation for this DIE. unsigned AbbrevNumber = Die->getAbbrevNumber(); - const DIEAbbrev *Abbrev = Abbrevs->at(AbbrevNumber - 1); + const DIEAbbrev *Abbrev = Abbrevs[AbbrevNumber - 1]; // Emit the code (index) for the abbreviation. if (Asm->isVerbose()) @@ -1889,26 +2062,44 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) { // Emit the DIE attribute values. for (unsigned i = 0, N = Values.size(); i < N; ++i) { - unsigned Attr = AbbrevData[i].getAttribute(); - unsigned Form = AbbrevData[i].getForm(); + dwarf::Attribute Attr = AbbrevData[i].getAttribute(); + dwarf::Form Form = AbbrevData[i].getForm(); assert(Form && "Too many attributes for DIE (check abbreviation)"); if (Asm->isVerbose()) Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr)); switch (Attr) { - case dwarf::DW_AT_abstract_origin: { + case dwarf::DW_AT_abstract_origin: + case dwarf::DW_AT_type: + case dwarf::DW_AT_friend: + case dwarf::DW_AT_specification: + case dwarf::DW_AT_import: + case dwarf::DW_AT_containing_type: { DIEEntry *E = cast<DIEEntry>(Values[i]); DIE *Origin = E->getEntry(); unsigned Addr = Origin->getOffset(); if (Form == dwarf::DW_FORM_ref_addr) { + assert(!useSplitDwarf() && "TODO: dwo files can't have relocations."); // For DW_FORM_ref_addr, output the offset from beginning of debug info // section. Origin->getOffset() returns the offset from start of the // compile unit. - DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - Addr += Holder.getCUOffset(Origin->getCompileUnit()); + CompileUnit *CU = CUDieMap.lookup(Origin->getCompileUnit()); + assert(CU && "CUDie should belong to a CU."); + Addr += CU->getDebugInfoOffset(); + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + Asm->EmitLabelPlusOffset(DwarfInfoSectionSym, Addr, + DIEEntry::getRefAddrSize(Asm)); + else + Asm->EmitLabelOffsetDifference(DwarfInfoSectionSym, Addr, + DwarfInfoSectionSym, + DIEEntry::getRefAddrSize(Asm)); + } else { + // Make sure Origin belong to the same CU. + assert(Die->getCompileUnit() == Origin->getCompileUnit() && + "The referenced DIE should belong to the same CU in ref4"); + Asm->EmitInt32(Addr); } - Asm->EmitInt32(Addr); break; } case dwarf::DW_AT_ranges: { @@ -1930,7 +2121,7 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) { case dwarf::DW_AT_location: { if (DIELabel *L = dyn_cast<DIELabel>(Values[i])) { if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - Asm->EmitLabelReference(L->getValue(), 4); + Asm->EmitSectionOffset(L->getValue(), DwarfDebugLocSectionSym); else Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4); } else { @@ -1984,20 +2175,10 @@ void DwarfUnits::emitUnits(DwarfDebug *DD, TheCU->getUniqueID())); // Emit size of content not including length itself - unsigned ContentSize = Die->getSize() + - sizeof(int16_t) + // DWARF version number - sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t); // Pointer Size (in bytes) + Asm->OutStreamer.AddComment("Length of Unit"); + Asm->EmitInt32(TheCU->getHeaderSize() + Die->getSize()); - Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); - Asm->EmitInt32(ContentSize); - Asm->OutStreamer.AddComment("DWARF version number"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); - Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()), - ASectionSym); - Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); + TheCU->emitHeader(ASection, ASectionSym); DD->emitDIE(Die, Abbreviations); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(USection->getLabelEndName(), @@ -2005,19 +2186,6 @@ void DwarfUnits::emitUnits(DwarfDebug *DD, } } -/// For a given compile unit DIE, returns offset from beginning of debug info. -unsigned DwarfUnits::getCUOffset(DIE *Die) { - assert(Die->getTag() == dwarf::DW_TAG_compile_unit && - "Input DIE should be compile unit in getCUOffset."); - for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), - E = CUs.end(); I != E; ++I) { - CompileUnit *TheCU = *I; - if (TheCU->getCUDie() == Die) - return TheCU->getDebugInfoOffset(); - } - llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits."); -} - // Emit the debug info section. void DwarfDebug::emitDebugInfo() { DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; @@ -2091,7 +2259,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { // Emit visible names into a hashed accelerator table section. void DwarfDebug::emitAccelNames() { - DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { @@ -2099,7 +2267,7 @@ void DwarfDebug::emitAccelNames() { const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNames(); for (StringMap<std::vector<DIE*> >::const_iterator GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); + StringRef Name = GI->getKey(); const std::vector<DIE *> &Entities = GI->second; for (std::vector<DIE *>::const_iterator DI = Entities.begin(), DE = Entities.end(); DI != DE; ++DI) @@ -2120,7 +2288,7 @@ void DwarfDebug::emitAccelNames() { // Emit objective C classes and categories into a hashed accelerator table // section. void DwarfDebug::emitAccelObjC() { - DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { @@ -2128,7 +2296,7 @@ void DwarfDebug::emitAccelObjC() { const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelObjC(); for (StringMap<std::vector<DIE*> >::const_iterator GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); + StringRef Name = GI->getKey(); const std::vector<DIE *> &Entities = GI->second; for (std::vector<DIE *>::const_iterator DI = Entities.begin(), DE = Entities.end(); DI != DE; ++DI) @@ -2148,7 +2316,7 @@ void DwarfDebug::emitAccelObjC() { // Emit namespace dies into a hashed accelerator table. void DwarfDebug::emitAccelNamespaces() { - DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { @@ -2156,7 +2324,7 @@ void DwarfDebug::emitAccelNamespaces() { const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNamespace(); for (StringMap<std::vector<DIE*> >::const_iterator GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); + StringRef Name = GI->getKey(); const std::vector<DIE *> &Entities = GI->second; for (std::vector<DIE *>::const_iterator DI = Entities.begin(), DE = Entities.end(); DI != DE; ++DI) @@ -2177,11 +2345,11 @@ void DwarfDebug::emitAccelNamespaces() { // Emit type dies into a hashed accelerator table. void DwarfDebug::emitAccelTypes() { std::vector<DwarfAccelTable::Atom> Atoms; - Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); - Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTag, + Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2)); - Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTypeFlags, + Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)); DwarfAccelTable AT(Atoms); for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), @@ -2191,7 +2359,7 @@ void DwarfDebug::emitAccelTypes() { = TheCU->getAccelTypes(); for (StringMap<std::vector<std::pair<DIE*, unsigned> > >::const_iterator GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); + StringRef Name = GI->getKey(); const std::vector<std::pair<DIE *, unsigned> > &Entities = GI->second; for (std::vector<std::pair<DIE *, unsigned> >::const_iterator DI = Entities.begin(), DE = Entities.end(); DI !=DE; ++DI) @@ -2209,23 +2377,85 @@ void DwarfDebug::emitAccelTypes() { AT.Emit(Asm, SectionBegin, &InfoHolder); } -/// emitDebugPubnames - Emit visible names into a debug pubnames section. +// Public name handling. +// The format for the various pubnames: +// +// dwarf pubnames - offset/name pairs where the offset is the offset into the CU +// for the DIE that is named. +// +// gnu pubnames - offset/index value/name tuples where the offset is the offset +// into the CU and the index value is computed according to the type of value +// for the DIE that is named. +// +// For type units the offset is the offset of the skeleton DIE. For split dwarf +// it's the offset within the debug_info/debug_types dwo section, however, the +// reference in the pubname header doesn't change. + +/// computeIndexValue - Compute the gdb index value for the DIE and CU. +static dwarf::PubIndexEntryDescriptor computeIndexValue(CompileUnit *CU, + DIE *Die) { + dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC; + + // We could have a specification DIE that has our most of our knowledge, + // look for that now. + DIEValue *SpecVal = Die->findAttribute(dwarf::DW_AT_specification); + if (SpecVal) { + DIE *SpecDIE = cast<DIEEntry>(SpecVal)->getEntry(); + if (SpecDIE->findAttribute(dwarf::DW_AT_external)) + Linkage = dwarf::GIEL_EXTERNAL; + } else if (Die->findAttribute(dwarf::DW_AT_external)) + Linkage = dwarf::GIEL_EXTERNAL; + + switch (Die->getTag()) { + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_enumeration_type: + return dwarf::PubIndexEntryDescriptor( + dwarf::GIEK_TYPE, CU->getLanguage() != dwarf::DW_LANG_C_plus_plus + ? dwarf::GIEL_STATIC + : dwarf::GIEL_EXTERNAL); + case dwarf::DW_TAG_typedef: + case dwarf::DW_TAG_base_type: + case dwarf::DW_TAG_subrange_type: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE, dwarf::GIEL_STATIC); + case dwarf::DW_TAG_namespace: + return dwarf::GIEK_TYPE; + case dwarf::DW_TAG_subprogram: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage); + case dwarf::DW_TAG_constant: + case dwarf::DW_TAG_variable: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage); + case dwarf::DW_TAG_enumerator: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, + dwarf::GIEL_STATIC); + default: + return dwarf::GIEK_NONE; + } +} + +/// emitDebugPubNames - Emit visible names into a debug pubnames section. /// -void DwarfDebug::emitDebugPubnames() { +void DwarfDebug::emitDebugPubNames(bool GnuStyle) { const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + const MCSection *PSec = + GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() + : Asm->getObjFileLowering().getDwarfPubNamesSection(); typedef DenseMap<const MDNode*, CompileUnit*> CUMapType; for (CUMapType::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { CompileUnit *TheCU = I->second; unsigned ID = TheCU->getUniqueID(); - if (TheCU->getGlobalNames().empty()) - continue; - // Start the dwarf pubnames section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfPubNamesSection()); + Asm->OutStreamer.SwitchSection(PSec); + + // Emit a label so we can reference the beginning of this pubname section. + if (GnuStyle) + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubnames", + TheCU->getUniqueID())); + // Emit the header. Asm->OutStreamer.AddComment("Length of Public Names Info"); Asm->EmitLabelDifference(Asm->GetTempSymbol("pubnames_end", ID), Asm->GetTempSymbol("pubnames_begin", ID), 4); @@ -2233,7 +2463,7 @@ void DwarfDebug::emitDebugPubnames() { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID)); Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); + Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), @@ -2244,18 +2474,27 @@ void DwarfDebug::emitDebugPubnames() { Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), 4); + // Emit the pubnames for this compilation unit. const StringMap<DIE*> &Globals = TheCU->getGlobalNames(); for (StringMap<DIE*>::const_iterator GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); - const DIE *Entity = GI->second; + DIE *Entity = GI->second; Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); + if (GnuStyle) { + dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity); + Asm->OutStreamer.AddComment( + Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); + Asm->EmitInt8(Desc.toBits()); + } + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); - Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1)); } Asm->OutStreamer.AddComment("End Mark"); @@ -2264,55 +2503,78 @@ void DwarfDebug::emitDebugPubnames() { } } -void DwarfDebug::emitDebugPubTypes() { +void DwarfDebug::emitDebugPubTypes(bool GnuStyle) { + const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + const MCSection *PSec = + GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() + : Asm->getObjFileLowering().getDwarfPubTypesSection(); + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { + E = CUMap.end(); + I != E; ++I) { CompileUnit *TheCU = I->second; // Start the dwarf pubtypes section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfPubTypesSection()); + Asm->OutStreamer.SwitchSection(PSec); + + // Emit a label so we can reference the beginning of this pubtype section. + if (GnuStyle) + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubtypes", + TheCU->getUniqueID())); + + // Emit the header. Asm->OutStreamer.AddComment("Length of Public Types Info"); Asm->EmitLabelDifference( - Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()), - Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4); + Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()), + Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin", - TheCU->getUniqueID())); + Asm->OutStreamer.EmitLabel( + Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID())); - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DW_PUBTYPES_VERSION); Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); - Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), - TheCU->getUniqueID()), - DwarfInfoSectionSym); + Asm->EmitSectionOffset( + Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()), + DwarfInfoSectionSym); Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), - TheCU->getUniqueID()), - Asm->GetTempSymbol(ISec->getLabelBeginName(), - TheCU->getUniqueID()), - 4); - - const StringMap<DIE*> &Globals = TheCU->getGlobalTypes(); - for (StringMap<DIE*>::const_iterator - GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + Asm->EmitLabelDifference( + Asm->GetTempSymbol(ISec->getLabelEndName(), TheCU->getUniqueID()), + Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()), 4); + + // Emit the pubtypes. + const StringMap<DIE *> &Globals = TheCU->getGlobalTypes(); + for (StringMap<DIE *>::const_iterator GI = Globals.begin(), + GE = Globals.end(); + GI != GE; ++GI) { const char *Name = GI->getKeyData(); DIE *Entity = GI->second; - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); + if (GnuStyle) { + dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity); + Asm->OutStreamer.AddComment( + Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); + Asm->EmitInt8(Desc.toBits()); + } + + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("External Name"); + // Emit the name with a terminating null byte. - Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1)); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength() + 1)); } Asm->OutStreamer.AddComment("End Mark"); Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end", - TheCU->getUniqueID())); + Asm->OutStreamer.EmitLabel( + Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID())); } } @@ -2367,24 +2629,18 @@ void DwarfUnits::emitAddresses(const MCSection *AddrSection) { // Start the dwarf addr section. Asm->OutStreamer.SwitchSection(AddrSection); - // Get all of the string pool entries and put them in an array by their ID so - // we can sort them. - SmallVector<std::pair<unsigned, - std::pair<MCSymbol*, unsigned>* >, 64> Entries; + // Order the address pool entries by ID + SmallVector<const MCExpr *, 64> Entries(AddressPool.size()); - for (DenseMap<MCSymbol*, std::pair<MCSymbol*, unsigned> >::iterator - I = AddressPool.begin(), E = AddressPool.end(); + for (DenseMap<const MCExpr *, unsigned>::iterator I = AddressPool.begin(), + E = AddressPool.end(); I != E; ++I) - Entries.push_back(std::make_pair(I->second.second, &(I->second))); - - array_pod_sort(Entries.begin(), Entries.end()); + Entries[I->second] = I->first; for (unsigned i = 0, e = Entries.size(); i != e; ++i) { - // Emit a label for reference from debug information entries. - MCSymbol *Sym = Entries[i].second->first; - if (Sym) - Asm->EmitLabelReference(Entries[i].second->first, - Asm->getDataLayout().getPointerSize()); + // Emit an expression for reference from debug information entries. + if (const MCExpr *Expr = Entries[i]) + Asm->OutStreamer.EmitValue(Expr, Asm->getDataLayout().getPointerSize()); else Asm->OutStreamer.EmitIntValue(0, Asm->getDataLayout().getPointerSize()); } @@ -2397,7 +2653,7 @@ void DwarfDebug::emitDebugStr() { Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); } -// Emit visible names into a debug loc section. +// Emit locations into the debug loc section. void DwarfDebug::emitDebugLoc() { if (DotDebugLocEntries.empty()) return; @@ -2426,9 +2682,9 @@ void DwarfDebug::emitDebugLoc() { Asm->OutStreamer.EmitIntValue(0, Size); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index)); } else { - Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size); - Asm->OutStreamer.EmitSymbolValue(Entry.End, Size); - DIVariable DV(Entry.Variable); + Asm->OutStreamer.EmitSymbolValue(Entry.getBeginSym(), Size); + Asm->OutStreamer.EmitSymbolValue(Entry.getEndSym(), Size); + DIVariable DV(Entry.getVariable()); Asm->OutStreamer.AddComment("Loc expr size"); MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol(); MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol(); @@ -2448,17 +2704,18 @@ void DwarfDebug::emitDebugLoc() { Asm->EmitULEB128(Entry.getInt()); } } else if (Entry.isLocation()) { + MachineLocation Loc = Entry.getLoc(); if (!DV.hasComplexAddress()) // Regular entry. - Asm->EmitDwarfRegOp(Entry.Loc); + Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); else { // Complex address entry. unsigned N = DV.getNumAddrElements(); unsigned i = 0; if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { - if (Entry.Loc.getOffset()) { + if (Loc.getOffset()) { i = 2; - Asm->EmitDwarfRegOp(Entry.Loc); + Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); Asm->OutStreamer.AddComment("DW_OP_deref"); Asm->EmitInt8(dwarf::DW_OP_deref); Asm->OutStreamer.AddComment("DW_OP_plus_uconst"); @@ -2467,12 +2724,12 @@ void DwarfDebug::emitDebugLoc() { } else { // If first address element is OpPlus then emit // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1)); - Asm->EmitDwarfRegOp(Loc); + MachineLocation TLoc(Loc.getReg(), DV.getAddrElement(1)); + Asm->EmitDwarfRegOp(TLoc, DV.isIndirect()); i = 2; } } else { - Asm->EmitDwarfRegOp(Entry.Loc); + Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); } // Emit remaining complex address elements. @@ -2482,7 +2739,7 @@ void DwarfDebug::emitDebugLoc() { Asm->EmitInt8(dwarf::DW_OP_plus_uconst); Asm->EmitULEB128(DV.getAddrElement(++i)); } else if (Element == DIBuilder::OpDeref) { - if (!Entry.Loc.isReg()) + if (!Loc.isReg()) Asm->EmitInt8(dwarf::DW_OP_deref); } else llvm_unreachable("unknown Opcode found in complex address"); @@ -2496,18 +2753,178 @@ void DwarfDebug::emitDebugLoc() { } } -// Emit visible names into a debug aranges section. +struct SymbolCUSorter { + SymbolCUSorter(const MCStreamer &s) : Streamer(s) {} + const MCStreamer &Streamer; + + bool operator() (const SymbolCU &A, const SymbolCU &B) { + unsigned IA = A.Sym ? Streamer.GetSymbolOrder(A.Sym) : 0; + unsigned IB = B.Sym ? Streamer.GetSymbolOrder(B.Sym) : 0; + + // Symbols with no order assigned should be placed at the end. + // (e.g. section end labels) + if (IA == 0) + IA = (unsigned)(-1); + if (IB == 0) + IB = (unsigned)(-1); + return IA < IB; + } +}; + +static bool CUSort(const CompileUnit *A, const CompileUnit *B) { + return (A->getUniqueID() < B->getUniqueID()); +} + +struct ArangeSpan { + const MCSymbol *Start, *End; +}; + +// Emit a debug aranges section, containing a CU lookup for any +// address we can tie back to a CU. void DwarfDebug::emitDebugARanges() { // Start the dwarf aranges section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfARangesSection()); + Asm->OutStreamer + .SwitchSection(Asm->getObjFileLowering().getDwarfARangesSection()); + + typedef DenseMap<CompileUnit *, std::vector<ArangeSpan> > SpansType; + + SpansType Spans; + + // Build a list of sections used. + std::vector<const MCSection *> Sections; + for (SectionMapType::iterator it = SectionMap.begin(); it != SectionMap.end(); + it++) { + const MCSection *Section = it->first; + Sections.push_back(Section); + } + + // Sort the sections into order. + // This is only done to ensure consistent output order across different runs. + std::sort(Sections.begin(), Sections.end(), SectionSort); + + // Build a set of address spans, sorted by CU. + for (size_t SecIdx=0;SecIdx<Sections.size();SecIdx++) { + const MCSection *Section = Sections[SecIdx]; + SmallVector<SymbolCU, 8> &List = SectionMap[Section]; + if (List.size() < 2) + continue; + + // Sort the symbols by offset within the section. + SymbolCUSorter sorter(Asm->OutStreamer); + std::sort(List.begin(), List.end(), sorter); + + // If we have no section (e.g. common), just write out + // individual spans for each symbol. + if (Section == NULL) { + for (size_t n = 0; n < List.size(); n++) { + const SymbolCU &Cur = List[n]; + + ArangeSpan Span; + Span.Start = Cur.Sym; + Span.End = NULL; + if (Cur.CU) + Spans[Cur.CU].push_back(Span); + } + } else { + // Build spans between each label. + const MCSymbol *StartSym = List[0].Sym; + for (size_t n = 1; n < List.size(); n++) { + const SymbolCU &Prev = List[n - 1]; + const SymbolCU &Cur = List[n]; + + // Try and build the longest span we can within the same CU. + if (Cur.CU != Prev.CU) { + ArangeSpan Span; + Span.Start = StartSym; + Span.End = Cur.Sym; + Spans[Prev.CU].push_back(Span); + StartSym = Cur.Sym; + } + } + } + } + + const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + unsigned PtrSize = Asm->getDataLayout().getPointerSize(); + + // Build a list of CUs used. + std::vector<CompileUnit *> CUs; + for (SpansType::iterator it = Spans.begin(); it != Spans.end(); it++) { + CompileUnit *CU = it->first; + CUs.push_back(CU); + } + + // Sort the CU list (again, to ensure consistent output order). + std::sort(CUs.begin(), CUs.end(), CUSort); + + // Emit an arange table for each CU we used. + for (size_t CUIdx=0;CUIdx<CUs.size();CUIdx++) { + CompileUnit *CU = CUs[CUIdx]; + std::vector<ArangeSpan> &List = Spans[CU]; + + // Emit size of content not including length itself. + unsigned ContentSize + = sizeof(int16_t) // DWARF ARange version number + + sizeof(int32_t) // Offset of CU in the .debug_info section + + sizeof(int8_t) // Pointer Size (in bytes) + + sizeof(int8_t); // Segment Size (in bytes) + + unsigned TupleSize = PtrSize * 2; + + // 7.20 in the Dwarf specs requires the table to be aligned to a tuple. + unsigned Padding = 0; + while (((sizeof(int32_t) + ContentSize + Padding) % TupleSize) != 0) + Padding++; + + ContentSize += Padding; + ContentSize += (List.size() + 1) * TupleSize; + + // For each compile unit, write the list of spans it covers. + Asm->OutStreamer.AddComment("Length of ARange Set"); + Asm->EmitInt32(ContentSize); + Asm->OutStreamer.AddComment("DWARF Arange version number"); + Asm->EmitInt16(dwarf::DW_ARANGES_VERSION); + Asm->OutStreamer.AddComment("Offset Into Debug Info Section"); + Asm->EmitSectionOffset( + Asm->GetTempSymbol(ISec->getLabelBeginName(), CU->getUniqueID()), + DwarfInfoSectionSym); + Asm->OutStreamer.AddComment("Address Size (in bytes)"); + Asm->EmitInt8(PtrSize); + Asm->OutStreamer.AddComment("Segment Size (in bytes)"); + Asm->EmitInt8(0); + + for (unsigned n = 0; n < Padding; n++) + Asm->EmitInt8(0xff); + + for (unsigned n = 0; n < List.size(); n++) { + const ArangeSpan &Span = List[n]; + Asm->EmitLabelReference(Span.Start, PtrSize); + + // Calculate the size as being from the span start to it's end. + if (Span.End) { + Asm->EmitLabelDifference(Span.End, Span.Start, PtrSize); + } else { + // For symbols without an end marker (e.g. common), we + // write a single arange entry containing just that one symbol. + uint64_t Size = SymSize[Span.Start]; + if (Size == 0) + Size = 1; + + Asm->OutStreamer.EmitIntValue(Size, PtrSize); + } + } + + Asm->OutStreamer.AddComment("ARange terminator"); + Asm->OutStreamer.EmitIntValue(0, PtrSize); + Asm->OutStreamer.EmitIntValue(0, PtrSize); + } } // Emit visible names into a debug ranges section. void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfRangesSection()); + Asm->OutStreamer + .SwitchSection(Asm->getObjFileLowering().getDwarfRangesSection()); unsigned char Size = Asm->getDataLayout().getPointerSize(); for (SmallVectorImpl<const MCSymbol *>::iterator I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); @@ -2528,113 +2945,27 @@ void DwarfDebug::emitDebugMacInfo() { } } -// Emit inline info using following format. -// Section Header: -// 1. length of section -// 2. Dwarf version number -// 3. address size. -// -// Entries (one "entry" for each function that was inlined): -// -// 1. offset into __debug_str section for MIPS linkage name, if exists; -// otherwise offset into __debug_str for regular function name. -// 2. offset into __debug_str section for regular function name. -// 3. an unsigned LEB128 number indicating the number of distinct inlining -// instances for the function. -// -// The rest of the entry consists of a {die_offset, low_pc} pair for each -// inlined instance; the die_offset points to the inlined_subroutine die in the -// __debug_info section, and the low_pc is the starting address for the -// inlining instance. -void DwarfDebug::emitDebugInlineInfo() { - if (!Asm->MAI->doesDwarfUseInlineInfoSection()) - return; - - if (!FirstCU) - return; - - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfDebugInlineSection()); - - Asm->OutStreamer.AddComment("Length of Debug Inlined Information Entry"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_inlined_end", 1), - Asm->GetTempSymbol("debug_inlined_begin", 1), 4); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_begin", 1)); - - Asm->OutStreamer.AddComment("Dwarf Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); - - for (SmallVectorImpl<const MDNode *>::iterator I = InlinedSPNodes.begin(), - E = InlinedSPNodes.end(); I != E; ++I) { - - const MDNode *Node = *I; - DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II - = InlineInfo.find(Node); - SmallVectorImpl<InlineInfoLabels> &Labels = II->second; - DISubprogram SP(Node); - StringRef LName = SP.getLinkageName(); - StringRef Name = SP.getName(); - - Asm->OutStreamer.AddComment("MIPS linkage name"); - if (LName.empty()) - Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name), - DwarfStrSectionSym); - else - Asm->EmitSectionOffset(InfoHolder - .getStringPoolEntry(getRealLinkageName(LName)), - DwarfStrSectionSym); - - Asm->OutStreamer.AddComment("Function name"); - Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name), - DwarfStrSectionSym); - Asm->EmitULEB128(Labels.size(), "Inline count"); - - for (SmallVectorImpl<InlineInfoLabels>::iterator LI = Labels.begin(), - LE = Labels.end(); LI != LE; ++LI) { - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); - Asm->EmitInt32(LI->second->getOffset()); - - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc"); - Asm->OutStreamer.EmitSymbolValue(LI->first, - Asm->getDataLayout().getPointerSize()); - } - } - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1)); -} - // DWARF5 Experimental Separate Dwarf emitters. // This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list, // DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id, -// DW_AT_ranges_base, DW_AT_addr_base. If DW_AT_ranges is present, -// DW_AT_low_pc and DW_AT_high_pc are not used, and vice versa. -CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { - DICompileUnit DIUnit(N); - CompilationDir = DIUnit.getDirectory(); +// DW_AT_ranges_base, DW_AT_addr_base. +CompileUnit *DwarfDebug::constructSkeletonCU(const CompileUnit *CU) { DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, - DIUnit.getLanguage(), Die, Asm, - this, &SkeletonHolder); + CompileUnit *NewCU = new CompileUnit(CU->getUniqueID(), Die, CU->getNode(), + Asm, this, &SkeletonHolder); NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, - DIUnit.getSplitDebugFilename()); - - // This should be a unique identifier when we want to build .dwp files. - NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); + CU->getNode().getSplitDebugFilename()); // Relocate to the beginning of the addr_base section, else 0 for the // beginning of the one for this compile unit. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, - DwarfAddrSectionSym); + NewCU->addSectionLabel(Die, dwarf::DW_AT_GNU_addr_base, + DwarfAddrSectionSym); else - NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, - dwarf::DW_FORM_sec_offset, 0); + NewCU->addSectionOffset(Die, dwarf::DW_AT_GNU_addr_base, 0); // 2.17.1 requires that we use DW_AT_low_pc for a single entry point // into an entity. We're using 0, or a NULL label for this. @@ -2644,14 +2975,47 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { // compile unit in debug_line section. // FIXME: Should handle multiple compile units. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, - DwarfLineSectionSym); + NewCU->addSectionLabel(Die, dwarf::DW_AT_stmt_list, + DwarfLineSectionSym); else - NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, 0); + NewCU->addSectionOffset(Die, dwarf::DW_AT_stmt_list, 0); if (!CompilationDir.empty()) NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + // Flags to let the linker know we have emitted new style pubnames. + if (GenerateGnuPubSections) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addSectionLabel( + Die, dwarf::DW_AT_GNU_pubnames, + Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID())); + else + NewCU->addSectionDelta( + Die, dwarf::DW_AT_GNU_pubnames, + Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID()), + DwarfGnuPubNamesSectionSym); + + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addSectionLabel( + Die, dwarf::DW_AT_GNU_pubtypes, + Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID())); + else + NewCU->addSectionDelta( + Die, dwarf::DW_AT_GNU_pubtypes, + Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID()), + DwarfGnuPubTypesSectionSym); + } + + // Flag if we've emitted any ranges and their location for the compile unit. + if (DebugRangeSymbols.size()) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addSectionLabel(Die, dwarf::DW_AT_GNU_ranges_base, + DwarfDebugRangeSectionSym); + else + NewCU->addUInt(Die, dwarf::DW_AT_GNU_ranges_base, dwarf::DW_FORM_data4, + 0); + } + SkeletonHolder.addUnit(NewCU); SkeletonCUs.push_back(NewCU); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 24f758d..cebac39 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -41,7 +41,6 @@ class DIEAbbrev; class DIE; class DIEBlock; class DIEEntry; -class DwarfDebug; //===----------------------------------------------------------------------===// /// \brief This class is used to record source line correspondence. @@ -63,13 +62,12 @@ public: /// \brief This struct describes location entries emitted in the .debug_loc /// section. -typedef struct DotDebugLocEntry { +class DotDebugLocEntry { + // Begin and end symbols for the address range that this location is valid. const MCSymbol *Begin; const MCSymbol *End; - MachineLocation Loc; - const MDNode *Variable; - bool Merged; - bool Constant; + + // Type of entry that this represents. enum EntryType { E_Location, E_Integer, @@ -83,23 +81,42 @@ typedef struct DotDebugLocEntry { const ConstantFP *CFP; const ConstantInt *CIP; } Constants; - DotDebugLocEntry() - : Begin(0), End(0), Variable(0), Merged(false), - Constant(false) { Constants.Int = 0;} + + // The location in the machine frame. + MachineLocation Loc; + + // The variable to which this location entry corresponds. + const MDNode *Variable; + + // Whether this location has been merged. + bool Merged; + +public: + DotDebugLocEntry() : Begin(0), End(0), Variable(0), Merged(false) { + Constants.Int = 0; + } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L, const MDNode *V) - : Begin(B), End(E), Loc(L), Variable(V), Merged(false), - Constant(false) { Constants.Int = 0; EntryKind = E_Location; } + : Begin(B), End(E), Loc(L), Variable(V), Merged(false) { + Constants.Int = 0; + EntryKind = E_Location; + } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i) - : Begin(B), End(E), Variable(0), Merged(false), - Constant(true) { Constants.Int = i; EntryKind = E_Integer; } + : Begin(B), End(E), Variable(0), Merged(false) { + Constants.Int = i; + EntryKind = E_Integer; + } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr) - : Begin(B), End(E), Variable(0), Merged(false), - Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; } + : Begin(B), End(E), Variable(0), Merged(false) { + Constants.CFP = FPtr; + EntryKind = E_ConstantFP; + } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr) - : Begin(B), End(E), Variable(0), Merged(false), - Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; } + : Begin(B), End(E), Variable(0), Merged(false) { + Constants.CIP = IPtr; + EntryKind = E_ConstantInt; + } /// \brief Empty entries are also used as a trigger to emit temp label. Such /// labels are referenced is used to find debug_loc offset for a given DIE. @@ -115,10 +132,14 @@ typedef struct DotDebugLocEntry { bool isInt() const { return EntryKind == E_Integer; } bool isConstantFP() const { return EntryKind == E_ConstantFP; } bool isConstantInt() const { return EntryKind == E_ConstantInt; } - int64_t getInt() { return Constants.Int; } - const ConstantFP *getConstantFP() { return Constants.CFP; } - const ConstantInt *getConstantInt() { return Constants.CIP; } -} DotDebugLocEntry; + int64_t getInt() const { return Constants.Int; } + const ConstantFP *getConstantFP() const { return Constants.CFP; } + const ConstantInt *getConstantInt() const { return Constants.CIP; } + const MDNode *getVariable() const { return Variable; } + const MCSymbol *getBeginSym() const { return Begin; } + const MCSymbol *getEndSym() const { return End; } + MachineLocation getLoc() const { return Loc; } +}; //===----------------------------------------------------------------------===// /// \brief This class is used to track local variable information. @@ -129,11 +150,12 @@ class DbgVariable { DbgVariable *AbsVar; // Corresponding Abstract variable, if any. const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. int FrameIndex; + DwarfDebug *DD; public: // AbsVar may be NULL. - DbgVariable(DIVariable V, DbgVariable *AV) + DbgVariable(DIVariable V, DbgVariable *AV, DwarfDebug *DD) : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0), - FrameIndex(~0) {} + FrameIndex(~0), DD(DD) {} // Accessors. DIVariable getVariable() const { return Var; } @@ -148,7 +170,7 @@ public: int getFrameIndex() const { return FrameIndex; } void setFrameIndex(int FI) { FrameIndex = FI; } // Translate tag to proper Dwarf tag. - unsigned getTag() const { + uint16_t getTag() const { if (Var.getTag() == dwarf::DW_TAG_arg_variable) return dwarf::DW_TAG_formal_parameter; @@ -172,32 +194,27 @@ public: } bool variableHasComplexAddress() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); + assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.hasComplexAddress(); } bool isBlockByrefVariable() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); + assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.isBlockByrefVariable(); } unsigned getNumAddrElements() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); + assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.getNumAddrElements(); } uint64_t getAddrElement(unsigned i) const { return Var.getAddrElement(i); } DIType getType() const; -}; - -// A String->Symbol mapping of strings used by indirect -// references. -typedef StringMap<std::pair<MCSymbol*, unsigned>, - BumpPtrAllocator&> StrPool; - -// A Symbol->pair<Symbol, unsigned> mapping of addresses used by indirect -// references. -typedef DenseMap<MCSymbol *, std::pair<MCSymbol *, unsigned> > AddrPool; +private: + /// resolve - Look in the DwarfDebug map for the MDNode that + /// corresponds to the reference. + template <typename T> T resolve(DIRef<T> Ref) const; +}; /// \brief Collects and handles information specific to a particular /// collection of units. @@ -209,27 +226,34 @@ class DwarfUnits { FoldingSet<DIEAbbrev> *AbbreviationsSet; // A list of all the unique abbreviations in use. - std::vector<DIEAbbrev *> *Abbreviations; + std::vector<DIEAbbrev *> &Abbreviations; // A pointer to all units in the section. SmallVector<CompileUnit *, 1> CUs; // Collection of strings for this unit and assorted symbols. + // A String->Symbol mapping of strings used by indirect + // references. + typedef StringMap<std::pair<MCSymbol*, unsigned>, + BumpPtrAllocator&> StrPool; StrPool StringPool; unsigned NextStringPoolNumber; std::string StringPref; // Collection of addresses for this unit and assorted labels. + // A Symbol->unsigned mapping of addresses used by indirect + // references. + typedef DenseMap<const MCExpr *, unsigned> AddrPool; AddrPool AddressPool; unsigned NextAddrPoolNumber; public: DwarfUnits(AsmPrinter *AP, FoldingSet<DIEAbbrev> *AS, - std::vector<DIEAbbrev *> *A, const char *Pref, - BumpPtrAllocator &DA) : - Asm(AP), AbbreviationsSet(AS), Abbreviations(A), - StringPool(DA), NextStringPoolNumber(0), StringPref(Pref), - AddressPool(), NextAddrPoolNumber(0) {} + std::vector<DIEAbbrev *> &A, const char *Pref, + BumpPtrAllocator &DA) + : Asm(AP), AbbreviationsSet(AS), Abbreviations(A), StringPool(DA), + NextStringPoolNumber(0), StringPref(Pref), AddressPool(), + NextAddrPoolNumber(0) {} /// \brief Compute the size and offset of a DIE given an incoming Offset. unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); @@ -245,14 +269,15 @@ public: /// \brief Emit all of the units to the section listed with the given /// abbreviation section. - void emitUnits(DwarfDebug *, const MCSection *, const MCSection *, - const MCSymbol *); + void emitUnits(DwarfDebug *DD, const MCSection *USection, + const MCSection *ASection, const MCSymbol *ASectionSym); /// \brief Emit all of the strings to the section given. - void emitStrings(const MCSection *, const MCSection *, const MCSymbol *); + void emitStrings(const MCSection *StrSection, const MCSection *OffsetSection, + const MCSymbol *StrSecSym); /// \brief Emit all of the addresses to the section given. - void emitAddresses(const MCSection *); + void emitAddresses(const MCSection *AddrSection); /// \brief Returns the entry into the start of the pool. MCSymbol *getStringPoolSym(); @@ -270,14 +295,18 @@ public: /// \brief Returns the index into the address pool with the given /// label/symbol. - unsigned getAddrPoolIndex(MCSymbol *); + unsigned getAddrPoolIndex(const MCExpr *Sym); + unsigned getAddrPoolIndex(const MCSymbol *Sym); /// \brief Returns the address pool. AddrPool *getAddrPool() { return &AddressPool; } +}; - /// \brief for a given compile unit DIE, returns offset from beginning of - /// debug info. - unsigned getCUOffset(DIE *Die); +/// \brief Helper used to pair up a symbol and its DWARF compile unit. +struct SymbolCU { + SymbolCU(CompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} + const MCSymbol *Sym; + CompileUnit *CU; }; /// \brief Collects and handles dwarf debug information. @@ -291,10 +320,7 @@ class DwarfDebug { // All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; - //===--------------------------------------------------------------------===// - // Attribute used to construct specific Dwarf sections. - // - + // Handle to the a compile unit used for the inline extension handling. CompileUnit *FirstCU; // Maps MDNode with its corresponding CompileUnit. @@ -303,6 +329,14 @@ class DwarfDebug { // Maps subprogram MDNode with its corresponding CompileUnit. DenseMap <const MDNode *, CompileUnit *> SPMap; + // Maps a CU DIE with its corresponding CompileUnit. + DenseMap <const DIE *, CompileUnit *> CUDieMap; + + /// Maps MDNodes for type sysstem with the corresponding DIEs. These DIEs can + /// be shared across CUs, that is why we keep the map here instead + /// of in CompileUnit. + DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap; + // Used to uniquely define abbreviations. FoldingSet<DIEAbbrev> AbbreviationsSet; @@ -315,10 +349,17 @@ class DwarfDebug { // separated by a zero byte, mapped to a unique id. StringMap<unsigned, BumpPtrAllocator&> SourceIdMap; + // List of all labels used in aranges generation. + std::vector<SymbolCU> ArangeLabels; + + // Size of each symbol emitted (for those symbols that have a specific size). + DenseMap <const MCSymbol *, uint64_t> SymSize; + // Provides a unique id per text section. - SetVector<const MCSection*> SectionMap; + typedef DenseMap<const MCSection *, SmallVector<SymbolCU, 8> > SectionMapType; + SectionMapType SectionMap; - // List of Arguments (DbgValues) for current function. + // List of arguments for current function. SmallVector<DbgVariable *, 8> CurrentFnArguments; LexicalScopes LScopes; @@ -327,7 +368,9 @@ class DwarfDebug { DenseMap<const MDNode *, DIE *> AbstractSPDies; // Collection of dbg variables of a scope. - DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > ScopeVariables; + typedef DenseMap<LexicalScope *, + SmallVector<DbgVariable *, 8> > ScopeVariablesMap; + ScopeVariablesMap ScopeVariables; // Collection of abstract variables. DenseMap<const MDNode *, DbgVariable *> AbstractVariables; @@ -339,12 +382,6 @@ class DwarfDebug { // as DW_AT_inline. SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs; - // Keep track of inlined functions and their location. This - // information is used to populate the debug_inlined section. - typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels; - DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo; - SmallVector<const MDNode *, 4> InlinedSPNodes; - // This is a collection of subprogram MDNodes that are processed to // create DIEs. SmallPtrSet<const MDNode *, 16> ProcessedSPNodes; @@ -377,16 +414,6 @@ class DwarfDebug { // body. DebugLoc PrologEndLoc; - struct FunctionDebugFrameInfo { - unsigned Number; - std::vector<MachineMove> Moves; - - FunctionDebugFrameInfo(unsigned Num, const std::vector<MachineMove> &M) - : Number(Num), Moves(M) {} - }; - - std::vector<FunctionDebugFrameInfo> DebugFrames; - // Section Symbols: these are assembler temporary labels that are emitted at // the beginning of each supported dwarf section. These are used to form // section offsets and are created by EmitSectionLabels. @@ -395,9 +422,10 @@ class DwarfDebug { MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym; + MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym; // As an optimization, there is no need to emit an entry in the directory - // table for the same directory as DW_at_comp_dir. + // table for the same directory as DW_AT_comp_dir. StringRef CompilationDir; // Counter for assigning globally unique IDs for CUs. @@ -409,8 +437,19 @@ class DwarfDebug { // Holders for the various debug information flags that we might need to // have exposed. See accessor functions below for description. - // Whether or not we're emitting info for older versions of gdb on darwin. - bool IsDarwinGDBCompat; + // Holder for imported entities. + typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32> + ImportedEntityMap; + ImportedEntityMap ScopesWithImportedEntities; + + // Holder for types that are going to be extracted out into a type unit. + std::vector<DIE *> TypeUnits; + + // Whether to emit the pubnames/pubtypes sections. + bool HasDwarfPubSections; + + // Version of dwarf we're emitting. + unsigned DwarfVersion; // DWARF5 Experimental Options bool HasDwarfAccelTables; @@ -433,9 +472,8 @@ class DwarfDebug { // Holder for the skeleton information. DwarfUnits SkeletonHolder; - typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32> - ImportedEntityMap; - ImportedEntityMap ScopesWithImportedEntities; + // Maps from a type identifier to the actual MDNode. + DITypeIdentifierMap TypeIdentifierMap; private: @@ -448,11 +486,14 @@ private: /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global /// variables in this scope then create and insert DIEs for these /// variables. - DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, const MDNode *SPNode); + DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP); /// \brief Construct new DW_TAG_lexical_block for this scope and /// attach DW_AT_low_pc/DW_AT_high_pc labels. DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); + /// A helper function to check whether the DIE for a given Scope is going + /// to be null. + bool isLexicalScopeDIENull(LexicalScope *Scope); /// \brief This scope represents inlined body of a function. Construct /// DIE to represent this concrete inlined copy of the function. @@ -460,6 +501,9 @@ private: /// \brief Construct a DIE for this scope. DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); + /// A helper function to create children of a Scope DIE. + DIE *createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope, + SmallVectorImpl<DIE*> &Children); /// \brief Emit initial Dwarf sections with a label at the start of each one. void emitSectionLabels(); @@ -511,10 +555,16 @@ private: void emitAccelTypes(); /// \brief Emit visible names into a debug pubnames section. - void emitDebugPubnames(); + /// \param GnuStyle determines whether or not we want to emit + /// additional information into the table ala newer gcc for gdb + /// index. + void emitDebugPubNames(bool GnuStyle = false); /// \brief Emit visible types into a debug pubtypes section. - void emitDebugPubTypes(); + /// \param GnuStyle determines whether or not we want to emit + /// additional information into the table ala newer gcc for gdb + /// index. + void emitDebugPubTypes(bool GnuStyle = false); /// \brief Emit visible names into a debug str section. void emitDebugStr(); @@ -538,7 +588,7 @@ private: /// \brief Construct the split debug info compile unit for the debug info /// section. - CompileUnit *constructSkeletonCU(const MDNode *); + CompileUnit *constructSkeletonCU(const CompileUnit *CU); /// \brief Emit the local split abbreviations. void emitSkeletonAbbrevs(const MCSection *); @@ -554,21 +604,21 @@ private: /// \brief Create new CompileUnit for the given metadata node with tag /// DW_TAG_compile_unit. - CompileUnit *constructCompileUnit(const MDNode *N); + CompileUnit *constructCompileUnit(DICompileUnit DIUnit); /// \brief Construct subprogram DIE. void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N); - /// \brief Construct import_module DIE. - void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N); + /// \brief Construct imported_module or imported_declaration DIE. + void constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N); /// \brief Construct import_module DIE. - void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N, + void constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N, DIE *Context); /// \brief Construct import_module DIE. - void constructImportedModuleDIE(CompileUnit *TheCU, - const DIImportedModule &Module, + void constructImportedEntityDIE(CompileUnit *TheCU, + const DIImportedEntity &Module, DIE *Context); /// \brief Register a source line with debug info. Returns the unique @@ -616,7 +666,13 @@ public: // Main entry points. // DwarfDebug(AsmPrinter *A, Module *M); - ~DwarfDebug(); + + void insertDIE(const MDNode *TypeMD, DIE *Die) { + MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); + } + DIE *getDIE(const MDNode *TypeMD) { + return MDTypeNodeToDieMap.lookup(TypeMD); + } /// \brief Emit all Dwarf sections that should come prior to the /// content. @@ -637,6 +693,17 @@ public: /// \brief Process end of an instruction. void endInstruction(const MachineInstr *MI); + /// \brief Add a DIE to the set of types that we're going to pull into + /// type units. + void addTypeUnitType(DIE *Die) { TypeUnits.push_back(Die); } + + /// \brief Add a label so that arange data can be generated for it. + void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } + + /// \brief For symbols that have a size designated (e.g. common symbols), + /// this tracks that size. + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) { SymSize[Sym] = Size;} + /// \brief Look up the source id with the given directory and source file /// names. If none currently exists, create a new id and insert it in the /// SourceIds map. @@ -644,11 +711,7 @@ public: unsigned CUID); /// \brief Recursively Emits a debug information entry. - void emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs); - - /// \brief Returns whether or not to limit some of our debug - /// output to the limitations of darwin gdb. - bool useDarwinGDBCompat() { return IsDarwinGDBCompat; } + void emitDIE(DIE *Die, ArrayRef<DIEAbbrev *> Abbrevs); // Experimental DWARF5 features. @@ -659,6 +722,19 @@ public: /// \brief Returns whether or not to change the current debug info for the /// split dwarf proposal support. bool useSplitDwarf() { return HasSplitDwarf; } + + /// Returns the Dwarf Version. + unsigned getDwarfVersion() const { return DwarfVersion; } + + /// Find the MDNode for the given reference. + template <typename T> T resolve(DIRef<T> Ref) const { + return Ref.resolve(TypeIdentifierMap); + } + + /// isSubprogramContext - Return true if Context is either a subprogram + /// or another context nested inside a subprogram. + bool isSubprogramContext(const MDNode *Context); + }; } // End of namespace llvm diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index 74b1b13..1575161 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -23,13 +23,13 @@ namespace llvm { template <typename T> class SmallVectorImpl; struct LandingPadInfo; class MachineModuleInfo; -class MachineMove; class MachineInstr; class MachineFunction; class MCAsmInfo; class MCExpr; class MCSymbol; class Function; +class ARMTargetStreamer; class AsmPrinter; //===----------------------------------------------------------------------===// @@ -178,6 +178,8 @@ public: class ARMException : public DwarfException { void EmitTypeInfos(unsigned TTypeEncoding); + ARMTargetStreamer &getTargetStreamer(); + public: //===--------------------------------------------------------------------===// // Main entry points. diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp index 4a99184..24aa1ab 100644 --- a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -26,18 +26,20 @@ using namespace llvm; namespace { class BasicTTI : public ImmutablePass, public TargetTransformInfo { - const TargetLoweringBase *TLI; + const TargetMachine *TM; /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the result needs to be inserted and/or extracted from vectors. unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); } + public: - BasicTTI() : ImmutablePass(ID), TLI(0) { + BasicTTI() : ImmutablePass(ID), TM(0) { llvm_unreachable("This pass cannot be directly constructed"); } - BasicTTI(const TargetLoweringBase *TLI) : ImmutablePass(ID), TLI(TLI) { + BasicTTI(const TargetMachine *TM) : ImmutablePass(ID), TM(TM) { initializeBasicTTIPass(*PassRegistry::getPassRegistry()); } @@ -63,6 +65,8 @@ public: return this; } + virtual bool hasBranchDivergence() const; + /// \name Scalar TTI Implementations /// @{ @@ -71,11 +75,16 @@ public: virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale) const; + virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) const; virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; virtual bool isTypeLegal(Type *Ty) const; virtual unsigned getJumpBufAlignment() const; virtual unsigned getJumpBufSize() const; virtual bool shouldBuildLookupTables() const; + virtual bool haveFastSqrt(Type *Ty) const; + virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; /// @} @@ -103,7 +112,8 @@ public: virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, ArrayRef<Type*> Tys) const; virtual unsigned getNumberOfParts(Type *Tp) const; - virtual unsigned getAddressComputationCost(Type *Ty) const; + virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const; + virtual unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) const; /// @} }; @@ -115,17 +125,18 @@ INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti", char BasicTTI::ID = 0; ImmutablePass * -llvm::createBasicTargetTransformInfoPass(const TargetLoweringBase *TLI) { - return new BasicTTI(TLI); +llvm::createBasicTargetTransformInfoPass(const TargetMachine *TM) { + return new BasicTTI(TM); } +bool BasicTTI::hasBranchDivergence() const { return false; } bool BasicTTI::isLegalAddImmediate(int64_t imm) const { - return TLI->isLegalAddImmediate(imm); + return getTLI()->isLegalAddImmediate(imm); } bool BasicTTI::isLegalICmpImmediate(int64_t imm) const { - return TLI->isLegalICmpImmediate(imm); + return getTLI()->isLegalICmpImmediate(imm); } bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, @@ -136,32 +147,52 @@ bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, AM.BaseOffs = BaseOffset; AM.HasBaseReg = HasBaseReg; AM.Scale = Scale; - return TLI->isLegalAddressingMode(AM, Ty); + return getTLI()->isLegalAddressingMode(AM, Ty); +} + +int BasicTTI::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) const { + TargetLoweringBase::AddrMode AM; + AM.BaseGV = BaseGV; + AM.BaseOffs = BaseOffset; + AM.HasBaseReg = HasBaseReg; + AM.Scale = Scale; + return getTLI()->getScalingFactorCost(AM, Ty); } bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const { - return TLI->isTruncateFree(Ty1, Ty2); + return getTLI()->isTruncateFree(Ty1, Ty2); } bool BasicTTI::isTypeLegal(Type *Ty) const { - EVT T = TLI->getValueType(Ty); - return TLI->isTypeLegal(T); + EVT T = getTLI()->getValueType(Ty); + return getTLI()->isTypeLegal(T); } unsigned BasicTTI::getJumpBufAlignment() const { - return TLI->getJumpBufAlignment(); + return getTLI()->getJumpBufAlignment(); } unsigned BasicTTI::getJumpBufSize() const { - return TLI->getJumpBufSize(); + return getTLI()->getJumpBufSize(); } bool BasicTTI::shouldBuildLookupTables() const { + const TargetLoweringBase *TLI = getTLI(); return TLI->supportJumpTables() && (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); } +bool BasicTTI::haveFastSqrt(Type *Ty) const { + const TargetLoweringBase *TLI = getTLI(); + EVT VT = TLI->getValueType(Ty); + return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); +} + +void BasicTTI::getUnrollingPreferences(Loop *, UnrollingPreferences &) const { } + //===----------------------------------------------------------------------===// // // Calls used by the vectorizers. @@ -199,6 +230,7 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, OperandValueKind) const { // Check if any of the operands are vector operands. + const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -245,6 +277,7 @@ unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { + const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -338,6 +371,7 @@ unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const { unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const { + const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -382,7 +416,7 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { assert(!Src->isVoidTy() && "Invalid type"); - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src); // Assume that all loads of legal types cost 1. return LT.first; @@ -420,15 +454,23 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, case Intrinsic::log10: ISD = ISD::FLOG10; break; case Intrinsic::log2: ISD = ISD::FLOG2; break; case Intrinsic::fabs: ISD = ISD::FABS; break; + case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break; case Intrinsic::floor: ISD = ISD::FFLOOR; break; case Intrinsic::ceil: ISD = ISD::FCEIL; break; case Intrinsic::trunc: ISD = ISD::FTRUNC; break; + case Intrinsic::nearbyint: + ISD = ISD::FNEARBYINT; break; case Intrinsic::rint: ISD = ISD::FRINT; break; + case Intrinsic::round: ISD = ISD::FROUND; break; case Intrinsic::pow: ISD = ISD::FPOW; break; case Intrinsic::fma: ISD = ISD::FMA; break; case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add? + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return 0; } + const TargetLoweringBase *TLI = getTLI(); std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy); if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { @@ -462,10 +504,24 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, } unsigned BasicTTI::getNumberOfParts(Type *Tp) const { - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Tp); return LT.first; } -unsigned BasicTTI::getAddressComputationCost(Type *Ty) const { +unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { return 0; } + +unsigned BasicTTI::getReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwise) const { + assert(Ty->isVectorTy() && "Expect a vector type"); + unsigned NumVecElts = Ty->getVectorNumElements(); + unsigned NumReduxLevels = Log2_32(NumVecElts); + unsigned ArithCost = NumReduxLevels * + TopTTI->getArithmeticInstrCost(Opcode, Ty); + // Assume the pairwise shuffles add a cost. + unsigned ShuffleCost = + NumReduxLevels * (IsPairwise + 1) * + TopTTI->getShuffleCost(SK_ExtractSubvector, Ty, NumVecElts / 2, Ty); + return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); +} diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp index f8cc3b3..9cd4208 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -135,8 +135,8 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { if (!I->isImplicitDef()) break; unsigned Reg = I->getOperand(0).getReg(); - ImpDefRegs.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) ImpDefRegs.insert(*SubRegs); ++I; } @@ -406,7 +406,8 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, /// MBB so that the part before the iterator falls into the part starting at the /// iterator. This returns the new MBB. MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, - MachineBasicBlock::iterator BBI1) { + MachineBasicBlock::iterator BBI1, + const BasicBlock *BB) { if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1)) return 0; @@ -414,7 +415,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // Create the fall-through block. MachineFunction::iterator MBBI = &CurMBB; - MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock()); + MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB); CurMBB.getParent()->insert(++MBBI, NewMBB); // Move all the successors of this block to the specified block. @@ -647,6 +648,7 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, /// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist /// only of the common tail. Create a block that does by splitting one. bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + MachineBasicBlock *SuccBB, unsigned maxCommonTailLength, unsigned &commonTailIndex) { commonTailIndex = 0; @@ -676,7 +678,12 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size " << maxCommonTailLength); - MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); + // If the split block unconditionally falls-thru to SuccBB, it will be + // merged. In control flow terms it should then take SuccBB's name. e.g. If + // SuccBB is an inner loop, the common tail is still part of the inner loop. + const BasicBlock *BB = (SuccBB && MBB->succ_size() == 1) ? + SuccBB->getBasicBlock() : MBB->getBasicBlock(); + MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI, BB); if (!newMBB) { DEBUG(dbgs() << "... failed!"); return false; @@ -784,7 +791,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, !SameTails[commonTailIndex].tailIsWholeBlock())) { // None of the blocks consist entirely of the common tail. // Split a block so that one does. - if (!CreateCommonTailOnlyBlock(PredBB, + if (!CreateCommonTailOnlyBlock(PredBB, SuccBB, maxCommonTailLength, commonTailIndex)) { RemoveBlocksWithHash(CurHash, SuccBB, PredBB); continue; diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h index df795df..0d15ed7 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.h +++ b/contrib/llvm/lib/CodeGen/BranchFolding.h @@ -1,4 +1,4 @@ -//===-- BranchFolding.h - Fold machine code branch instructions --*- C++ -*===// +//===-- BranchFolding.h - Fold machine code branch instructions -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -100,13 +100,15 @@ namespace llvm { void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest); MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, - MachineBasicBlock::iterator BBI1); + MachineBasicBlock::iterator BBI1, + const BasicBlock *BB); unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB); void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB); bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + MachineBasicBlock *SuccBB, unsigned maxCommonTailLength, unsigned &commonTailIndex); diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp index 38ae17d..4925c4d 100644 --- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -9,13 +9,12 @@ #define DEBUG_TYPE "calcspillweights" -#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -23,36 +22,22 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -char CalculateSpillWeights::ID = 0; -INITIALIZE_PASS_BEGIN(CalculateSpillWeights, "calcspillweights", - "Calculate spill weights", false, false) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights", - "Calculate spill weights", false, false) - -void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { - au.addRequired<LiveIntervals>(); - au.addRequired<MachineLoopInfo>(); - au.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(au); -} - -bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) { - +void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, + MachineFunction &MF, + const MachineLoopInfo &MLI, + const MachineBlockFrequencyInfo &MBFI, + VirtRegAuxInfo::NormalizingFn norm) { DEBUG(dbgs() << "********** Compute Spill Weights **********\n" << "********** Function: " << MF.getName() << '\n'); - LiveIntervals &LIS = getAnalysis<LiveIntervals>(); MachineRegisterInfo &MRI = MF.getRegInfo(); - VirtRegAuxInfo VRAI(MF, LIS, getAnalysis<MachineLoopInfo>()); + VirtRegAuxInfo VRAI(MF, LIS, MLI, MBFI, norm); for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI.reg_nodbg_empty(Reg)) continue; - VRAI.CalculateWeightAndHint(LIS.getInterval(Reg)); + VRAI.calculateSpillWeightAndHint(LIS.getInterval(Reg)); } - return false; } // Return the preferred allocation register for reg, given a COPY instruction. @@ -107,12 +92,12 @@ static bool isRematerializable(const LiveInterval &LI, return true; } -void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { +void +VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { MachineRegisterInfo &mri = MF.getRegInfo(); const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo(); MachineBasicBlock *mbb = 0; MachineLoop *loop = 0; - unsigned loopDepth = 0; bool isExiting = false; float totalWeight = 0; SmallPtrSet<MachineInstr*, 8> visited; @@ -140,14 +125,14 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { if (mi->getParent() != mbb) { mbb = mi->getParent(); loop = Loops.getLoopFor(mbb); - loopDepth = loop ? loop->getLoopDepth() : 0; isExiting = loop ? loop->isLoopExiting(mbb) : false; } // Calculate instr weight. bool reads, writes; tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); - weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth); + weight = LiveIntervals::getSpillWeight( + writes, reads, MBFI.getBlockFreq(mi->getParent())); // Give extra weight to what looks like a loop induction variable update. if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb)) @@ -198,5 +183,5 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { if (isRematerializable(li, LIS, *MF.getTarget().getInstrInfo())) totalWeight *= 0.5F; - li.weight = normalizeSpillWeight(totalWeight, li.getSize()); + li.weight = normalize(totalWeight, li.getSize()); } diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp index 75f4b96..fcfc9dc 100644 --- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp @@ -24,7 +24,7 @@ using namespace llvm; CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, - const TargetMachine &tm, SmallVector<CCValAssign, 16> &locs, + const TargetMachine &tm, SmallVectorImpl<CCValAssign> &locs, LLVMContext &C) : CallingConv(CC), IsVarArg(isVarArg), MF(mf), TM(tm), TRI(*TM.getRegisterInfo()), Locs(locs), Context(C), diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp index c641991..7430c53 100644 --- a/contrib/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp @@ -22,7 +22,6 @@ using namespace llvm; void llvm::initializeCodeGen(PassRegistry &Registry) { initializeBasicTTIPass(Registry); initializeBranchFolderPassPass(Registry); - initializeCalculateSpillWeightsPass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeEarlyIfConverterPass(Registry); initializeExpandPostRAPass(Registry); @@ -60,7 +59,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeStackProtectorPass(Registry); initializeStackColoringPass(Registry); initializeStackSlotColoringPass(Registry); - initializeStrongPHIEliminationPass(Registry); initializeTailDuplicatePassPass(Registry); initializeTargetPassConfigPass(Registry); initializeTwoAddressInstructionPassPass(Registry); diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 0eb74a4..18c8e0a 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -201,8 +201,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { if (MO.isUse() && Special) { if (!KeepRegs.test(Reg)) { - KeepRegs.set(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) KeepRegs.set(*SubRegs); } } @@ -361,7 +361,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin, unsigned AntiDepReg, unsigned LastNewReg, const TargetRegisterClass *RC, - SmallVector<unsigned, 2> &Forbid) + SmallVectorImpl<unsigned> &Forbid) { ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC); for (unsigned i = 0; i != Order.size(); ++i) { @@ -388,7 +388,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin, continue; // If NewReg overlaps any of the forbidden registers, we can't use it. bool Forbidden = false; - for (SmallVector<unsigned, 2>::iterator it = Forbid.begin(), + for (SmallVectorImpl<unsigned>::iterator it = Forbid.begin(), ite = Forbid.end(); it != ite; ++it) if (TRI->regsOverlap(NewReg, *it)) { Forbidden = true; diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h index df13dd3..565d20b 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h @@ -103,7 +103,7 @@ class TargetRegisterInfo; unsigned AntiDepReg, unsigned LastNewReg, const TargetRegisterClass *RC, - SmallVector<unsigned, 2> &Forbid); + SmallVectorImpl<unsigned> &Forbid); }; } diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp index 840a101..6619bcf 100644 --- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -160,7 +160,8 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, MachineBasicBlock::iterator EndItr) { assert(VLIWScheduler && "VLIW Scheduler is not initialized!"); VLIWScheduler->startBlock(MBB); - VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size()); + VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, + std::distance(BeginItr, EndItr)); VLIWScheduler->schedule(); // Generate MI -> SU map. diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index a54217f..5efe1ff 100644 --- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -154,11 +154,11 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - LivePhysRegs.reset(Reg); // Check the subreg set, not the alias set, because a def // of a super-register may still be partially live after // this def. - for (MCSubRegIterator SR(Reg, TRI); SR.isValid(); ++SR) + for (MCSubRegIterator SR(Reg, TRI,/*IncludeSelf=*/true); + SR.isValid(); ++SR) LivePhysRegs.reset(*SR); } } else if (MO.isRegMask()) { diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp index f27ec77..c7c1752 100644 --- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -33,7 +33,6 @@ STATISTIC(NumResumesLowered, "Number of resume calls lowered"); namespace { class DwarfEHPrepare : public FunctionPass { const TargetMachine *TM; - const TargetLoweringBase *TLI; // RewindFunction - _Unwind_Resume or the target equivalent. Constant *RewindFunction; @@ -43,9 +42,8 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - DwarfEHPrepare(const TargetMachine *tm) : - FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()), - RewindFunction(0) { + DwarfEHPrepare(const TargetMachine *TM) : + FunctionPass(ID), TM(TM), RewindFunction(0) { initializeDominatorTreePass(*PassRegistry::getPassRegistry()); } @@ -61,8 +59,8 @@ namespace { char DwarfEHPrepare::ID = 0; -FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) { - return new DwarfEHPrepare(tm); +FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) { + return new DwarfEHPrepare(TM); } /// GetExceptionObject - Return the exception object from the value passed into @@ -108,20 +106,18 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { /// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present /// into calls to the appropriate _Unwind_Resume function. bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { - bool UsesNewEH = false; SmallVector<ResumeInst*, 16> Resumes; for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { TerminatorInst *TI = I->getTerminator(); if (ResumeInst *RI = dyn_cast<ResumeInst>(TI)) Resumes.push_back(RI); - else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) - UsesNewEH = II->getUnwindDest()->isLandingPad(); } if (Resumes.empty()) - return UsesNewEH; + return false; // Find the rewind function if we didn't already. + const TargetLowering *TLI = TM->getTargetLowering(); if (!RewindFunction) { LLVMContext &Ctx = Resumes[0]->getContext(); FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp index 9b0e76f..031f19c 100644 --- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp +++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp @@ -23,6 +23,7 @@ #define DEBUG_TYPE "execution-fix" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Allocator.h" @@ -91,7 +92,7 @@ struct DomainValue { // First domain available. unsigned getFirstDomain() const { - return CountTrailingZeros_32(AvailableDomains); + return countTrailingZeros(AvailableDomains); } DomainValue() : Refs(0) { clear(); } @@ -136,6 +137,12 @@ class ExeDepsFix : public MachineFunctionPass { typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap; LiveOutMap LiveOuts; + /// List of undefined register reads in this block in forward order. + std::vector<std::pair<MachineInstr*, unsigned> > UndefReads; + + /// Storage for register unit liveness. + LiveRegUnits LiveUnits; + /// Current instruction number. /// The first instruction in each basic block is 0. int CurInstr; @@ -185,6 +192,8 @@ private: void processDefs(MachineInstr*, bool Kill); void visitSoftInstr(MachineInstr*, unsigned mask); void visitHardInstr(MachineInstr*, unsigned domain); + bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref); + void processUndefReads(MachineBasicBlock*); }; } @@ -341,6 +350,10 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // Reset instruction counter in each basic block. CurInstr = 0; + // Set up UndefReads to track undefined register reads. + UndefReads.clear(); + LiveUnits.clear(); + // Set up LiveRegs to represent registers entering MBB. if (!LiveRegs) LiveRegs = new LiveReg[NumRegs]; @@ -448,10 +461,46 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) { processDefs(MI, !DomP.first); } +/// \brief Return true to if it makes sense to break dependence on a partial def +/// or undef use. +bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, + unsigned Pref) { + int rx = regIndex(MI->getOperand(OpIdx).getReg()); + if (rx < 0) + return false; + + unsigned Clearance = CurInstr - LiveRegs[rx].Def; + DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); + + if (Pref > Clearance) { + DEBUG(dbgs() << ": Break dependency.\n"); + return true; + } + // The current clearance seems OK, but we may be ignoring a def from a + // back-edge. + if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { + DEBUG(dbgs() << ": OK .\n"); + return false; + } + // A def from an unprocessed back-edge may make us break this dependency. + DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); + return false; +} + // Update def-ages for registers defined by MI. // If Kill is set, also kill off DomainValues clobbered by the defs. +// +// Also break dependencies on partial defs and undef uses. void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { assert(!MI->isDebugValue() && "Won't process debug values"); + + // Break dependence on undef uses. Do this before updating LiveRegs below. + unsigned OpNum; + unsigned Pref = TII->getUndefRegClearance(MI, OpNum, TRI); + if (Pref) { + if (shouldBreakDependence(MI, OpNum, Pref)) + UndefReads.push_back(std::make_pair(MI, OpNum)); + } const MCInstrDesc &MCID = MI->getDesc(); for (unsigned i = 0, e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); @@ -471,37 +520,58 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr << '\t' << *MI); + // Check clearance before partial register updates. + // Call breakDependence before setting LiveRegs[rx].Def. + unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); + if (Pref && shouldBreakDependence(MI, i, Pref)) + TII->breakPartialRegDependency(MI, i, TRI); + // How many instructions since rx was last written? - unsigned Clearance = CurInstr - LiveRegs[rx].Def; LiveRegs[rx].Def = CurInstr; // Kill off domains redefined by generic instructions. if (Kill) kill(rx); + } + ++CurInstr; +} - // Verify clearance before partial register updates. - unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); - if (!Pref) - continue; - DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); - if (Pref > Clearance) { - DEBUG(dbgs() << ": Break dependency.\n"); - TII->breakPartialRegDependency(MI, i, TRI); - continue; - } - - // The current clearance seems OK, but we may be ignoring a def from a - // back-edge. - if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { - DEBUG(dbgs() << ": OK.\n"); - continue; - } +/// \break Break false dependencies on undefined register reads. +/// +/// Walk the block backward computing precise liveness. This is expensive, so we +/// only do it on demand. Note that the occurrence of undefined register reads +/// that should be broken is very rare, but when they occur we may have many in +/// a single block. +void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) { + if (UndefReads.empty()) + return; - // A def from an unprocessed back-edge may make us break this dependency. - DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); + // Collect this block's live out register units. + LiveUnits.init(TRI); + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + LiveUnits.addLiveIns(*SI, *TRI); } + MachineInstr *UndefMI = UndefReads.back().first; + unsigned OpIdx = UndefReads.back().second; - ++CurInstr; + for (MachineBasicBlock::reverse_iterator I = MBB->rbegin(), E = MBB->rend(); + I != E; ++I) { + // Update liveness, including the current instrucion's defs. + LiveUnits.stepBackward(*I, *TRI); + + if (UndefMI == &*I) { + if (!LiveUnits.contains(UndefMI->getOperand(OpIdx).getReg(), *TRI)) + TII->breakPartialRegDependency(UndefMI, OpIdx, TRI); + + UndefReads.pop_back(); + if (UndefReads.empty()) + return; + + UndefMI = UndefReads.back().first; + OpIdx = UndefReads.back().second; + } + } } // A hard instruction only works in one domain. All input registers will be @@ -549,7 +619,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Is it possible to use this collapsed register for free? if (dv->isCollapsed()) { // Restrict available domains to the ones in common with the operand. - // If there are no common domains, we must pay the cross-domain + // If there are no common domains, we must pay the cross-domain // penalty for this operand. if (common) available = common; } else if (common) @@ -564,7 +634,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // If the collapsed operands force a single domain, propagate the collapse. if (isPowerOf2_32(available)) { - unsigned domain = CountTrailingZeros_32(available); + unsigned domain = countTrailingZeros(available); TII->setExecutionDomain(mi, domain); visitHardInstr(mi, domain); return; @@ -573,7 +643,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Kill off any remaining uses that don't match available, and build a list of // incoming DomainValues that we want to merge. SmallVector<LiveReg, 4> Regs; - for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { + for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { int rx = *i; const LiveReg &LR = LiveRegs[rx]; // This useless DomainValue could have been missed above. @@ -583,7 +653,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { } // Sorted insertion. bool Inserted = false; - for (SmallVector<LiveReg, 4>::iterator i = Regs.begin(), e = Regs.end(); + for (SmallVectorImpl<LiveReg>::iterator i = Regs.begin(), e = Regs.end(); i != e && !Inserted; ++i) { if (LR.Def < i->Def) { Inserted = true; @@ -614,7 +684,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { continue; // If latest didn't merge, it is useless now. Kill all registers using it. - for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i) + for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) if (LiveRegs[*i].Value == Latest) kill(*i); } @@ -686,6 +756,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) visitInstr(I); + processUndefReads(MBB); leaveBasicBlock(MBB); } @@ -698,6 +769,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { ++I) if (!I->isDebugValue()) processDefs(I, false); + processUndefReads(MBB); leaveBasicBlock(MBB); } @@ -713,6 +785,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { delete[] FI->second; } LiveOuts.clear(); + UndefReads.clear(); Avail.clear(); Allocator.DestroyAll(); diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index 1611db8..6c73fff 100644 --- a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -104,7 +104,7 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { } if (DstSubReg == InsReg) { - // No need to insert an identify copy instruction. + // No need to insert an identity copy instruction. // Watch out for case like this: // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3 // We must leave %RAX live. diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp index 8264d6d..e2d0eb4 100644 --- a/contrib/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -22,6 +22,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -31,6 +33,8 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + using namespace llvm; // Hidden options for help debugging. @@ -150,14 +154,17 @@ namespace { /// BBAnalysis - Results of if-conversion feasibility analysis indexed by /// basic block number. std::vector<BBInfo> BBAnalysis; + TargetSchedModel SchedModel; const TargetLoweringBase *TLI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - const InstrItineraryData *InstrItins; const MachineBranchProbabilityInfo *MBPI; MachineRegisterInfo *MRI; + LiveRegUnits Redefs; + LiveRegUnits DontKill; + bool PreRegAlloc; bool MadeChange; int FnNum; @@ -198,11 +205,9 @@ namespace { void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl<MachineOperand> &Cond, - SmallSet<unsigned, 4> &Redefs, SmallSet<unsigned, 4> *LaterRedefs = 0); void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl<MachineOperand> &Cond, - SmallSet<unsigned, 4> &Redefs, bool IgnoreBr = false); void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true); @@ -267,7 +272,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getTarget().getRegisterInfo(); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); MRI = &MF.getRegInfo(); - InstrItins = MF.getTarget().getInstrItineraryData(); + + const TargetSubtargetInfo &ST = + MF.getTarget().getSubtarget<TargetSubtargetInfo>(); + SchedModel.init(*ST.getSchedModel(), &ST, TII); + if (!TII) return false; PreRegAlloc = MRI->isSSA(); @@ -666,32 +675,28 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { bool isPredicated = TII->isPredicated(I); bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch(); - if (!isCondBr) { - if (!isPredicated) { - BBI.NonPredSize++; - unsigned ExtraPredCost = 0; - unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, - &ExtraPredCost); - if (NumCycles > 1) - BBI.ExtraCost += NumCycles-1; - BBI.ExtraCost2 += ExtraPredCost; - } else if (!AlreadyPredicated) { - // FIXME: This instruction is already predicated before the - // if-conversion pass. It's probably something like a conditional move. - // Mark this block unpredicable for now. - BBI.IsUnpredicable = true; - return; - } + // A conditional branch is not predicable, but it may be eliminated. + if (isCondBr) + continue; + + if (!isPredicated) { + BBI.NonPredSize++; + unsigned ExtraPredCost = TII->getPredicationCost(&*I); + unsigned NumCycles = SchedModel.computeInstrLatency(&*I, false); + if (NumCycles > 1) + BBI.ExtraCost += NumCycles-1; + BBI.ExtraCost2 += ExtraPredCost; + } else if (!AlreadyPredicated) { + // FIXME: This instruction is already predicated before the + // if-conversion pass. It's probably something like a conditional move. + // Mark this block unpredicable for now. + BBI.IsUnpredicable = true; + return; } if (BBI.ClobbersPred && !isPredicated) { // Predicate modification instruction should end the block (except for // already predicated instructions and end of block branches). - if (isCondBr) { - // A conditional branch is not predicable, but it may be eliminated. - continue; - } - // Predicate may have been modified, the subsequent (currently) // unpredicated instructions cannot be correctly predicated. BBI.IsUnpredicable = true; @@ -720,9 +725,9 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, if (BBI.IsDone || BBI.IsUnpredicable) return false; - // If it is already predicated, check if its predicate subsumes the new - // predicate. - if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred)) + // If it is already predicated, check if the new predicate subsumes + // its predicate. + if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate)) return false; if (BBI.BrCond.size()) { @@ -961,64 +966,58 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) { BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); } -/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are -/// modeled as read + write (sort like two-address instructions). These -/// routines track register liveness and add implicit uses to if-converted -/// instructions to conform to the model. -static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs, - const TargetRegisterInfo *TRI) { - for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), - E = BB->livein_end(); I != E; ++I) { - unsigned Reg = *I; - Redefs.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - Redefs.insert(*SubRegs); - } -} - -static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs, - const TargetRegisterInfo *TRI, - bool AddImpUse = false) { - SmallVector<unsigned, 4> Defs; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) +/// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all +/// values defined in MI which are not live/used by MI. +static void UpdatePredRedefs(MachineInstr *MI, LiveRegUnits &Redefs, + const TargetRegisterInfo *TRI) { + for (ConstMIBundleOperands Ops(MI); Ops.isValid(); ++Ops) { + if (!Ops->isReg() || !Ops->isKill()) continue; - unsigned Reg = MO.getReg(); - if (!Reg) + unsigned Reg = Ops->getReg(); + if (Reg == 0) continue; - if (MO.isDef()) - Defs.push_back(Reg); - else if (MO.isKill()) { - Redefs.erase(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - Redefs.erase(*SubRegs); - } + Redefs.removeReg(Reg, *TRI); } - MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); - for (unsigned i = 0, e = Defs.size(); i != e; ++i) { - unsigned Reg = Defs[i]; - if (!Redefs.insert(Reg)) { - if (AddImpUse) - // Treat predicated update as read + write. - MIB.addReg(Reg, RegState::Implicit | RegState::Undef); - } else { - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - Redefs.insert(*SubRegs); - } + for (MIBundleOperands Ops(MI); Ops.isValid(); ++Ops) { + if (!Ops->isReg() || !Ops->isDef()) + continue; + unsigned Reg = Ops->getReg(); + if (Reg == 0 || Redefs.contains(Reg, *TRI)) + continue; + Redefs.addReg(Reg, *TRI); + + MachineOperand &Op = *Ops; + MachineInstr *MI = Op.getParent(); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); + MIB.addReg(Reg, RegState::Implicit | RegState::Undef); } } -static void UpdatePredRedefs(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator E, - SmallSet<unsigned,4> &Redefs, - const TargetRegisterInfo *TRI) { - while (I != E) { - UpdatePredRedefs(I, Redefs, TRI); - ++I; +/** + * Remove kill flags from operands with a registers in the @p DontKill set. + */ +static void RemoveKills(MachineInstr &MI, const LiveRegUnits &DontKill, + const MCRegisterInfo &MCRI) { + for (MIBundleOperands O(&MI); O.isValid(); ++O) { + if (!O->isReg() || !O->isKill()) + continue; + if (DontKill.contains(O->getReg(), MCRI)) + O->setIsKill(false); } } +/** + * Walks a range of machine instructions and removes kill flags for registers + * in the @p DontKill set. + */ +static void RemoveKills(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + const LiveRegUnits &DontKill, + const MCRegisterInfo &MCRI) { + for ( ; I != E; ++I) + RemoveKills(*I, DontKill, MCRI); +} + /// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. /// bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { @@ -1049,21 +1048,27 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { // Initialize liveins to the first BB. These are potentiall redefined by // predicated instructions. - SmallSet<unsigned, 4> Redefs; - InitPredRedefs(CvtBBI->BB, Redefs, TRI); - InitPredRedefs(NextBBI->BB, Redefs, TRI); + Redefs.init(TRI); + Redefs.addLiveIns(CvtBBI->BB, *TRI); + Redefs.addLiveIns(NextBBI->BB, *TRI); + + // Compute a set of registers which must not be killed by instructions in + // BB1: This is everything live-in to BB2. + DontKill.init(TRI); + DontKill.addLiveIns(NextBBI->BB, *TRI); if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. - CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs); + CopyAndPredicateBlock(BBI, *CvtBBI, Cond); // RemoveExtraEdges won't work if the block has an unanalyzable branch, so // explicitly remove CvtBBI as a successor. BBI.BB->removeSuccessor(CvtBBI->BB); } else { - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); + RemoveKills(CvtBBI->BB->begin(), CvtBBI->BB->end(), DontKill, *TRI); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); // Merge converted block into entry block. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -1148,16 +1153,18 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. - SmallSet<unsigned, 4> Redefs; - InitPredRedefs(CvtBBI->BB, Redefs, TRI); - InitPredRedefs(NextBBI->BB, Redefs, TRI); + Redefs.init(TRI); + Redefs.addLiveIns(CvtBBI->BB, *TRI); + Redefs.addLiveIns(NextBBI->BB, *TRI); + + DontKill.clear(); bool HasEarlyExit = CvtBBI->FalseBB != NULL; if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. - CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true); + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); // RemoveExtraEdges won't work if the block has an unanalyzable branch, so // explicitly remove CvtBBI as a successor. @@ -1165,7 +1172,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); // Now merge the entry of the triangle with the true block. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -1276,8 +1283,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. - SmallSet<unsigned, 4> Redefs; - InitPredRedefs(BBI1->BB, Redefs, TRI); + Redefs.init(TRI); + Redefs.addLiveIns(BBI1->BB, *TRI); // Remove the duplicated instructions at the beginnings of both paths. MachineBasicBlock::iterator DI1 = BBI1->BB->begin(); @@ -1304,7 +1311,19 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, --NumDups1; } - UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI); + // Compute a set of registers which must not be killed by instructions in BB1: + // This is everything used+live in BB2 after the duplicated instructions. We + // can compute this set by simulating liveness backwards from the end of BB2. + DontKill.init(TRI); + for (MachineBasicBlock::reverse_iterator I = BBI2->BB->rbegin(), + E = MachineBasicBlock::reverse_iterator(DI2); I != E; ++I) { + DontKill.stepBackward(*I, *TRI); + } + + for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E; + ++I) { + Redefs.stepForward(*I, *TRI); + } BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); BBI2->BB->erase(BBI2->BB->begin(), DI2); @@ -1322,6 +1341,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, } BBI1->BB->erase(DI1, BBI1->BB->end()); + // Kill flags in the true block for registers living into the false block + // must be removed. + RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI); + // Remove 'false' block branch and find the last instruction to predicate. BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); DI2 = BBI2->BB->end(); @@ -1362,8 +1385,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, } else if (!RedefsByFalse.count(Reg)) { // These are defined before ctrl flow reach the 'false' instructions. // They cannot be modified by the 'true' instructions. - ExtUses.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) ExtUses.insert(*SubRegs); } } @@ -1371,8 +1394,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, for (unsigned i = 0, e = Defs.size(); i != e; ++i) { unsigned Reg = Defs[i]; if (!ExtUses.count(Reg)) { - RedefsByFalse.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) RedefsByFalse.insert(*SubRegs); } } @@ -1380,10 +1403,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, } // Predicate the 'true' block. - PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs, &RedefsByFalse); + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, &RedefsByFalse); // Predicate the 'false' block. - PredicateBlock(*BBI2, DI2, *Cond2, Redefs); + PredicateBlock(*BBI2, DI2, *Cond2); // Merge the true block into the entry of the diamond. MergeBlocks(BBI, *BBI1, TailBB == 0); @@ -1458,7 +1481,6 @@ static bool MaySpeculate(const MachineInstr *MI, void IfConverter::PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl<MachineOperand> &Cond, - SmallSet<unsigned, 4> &Redefs, SmallSet<unsigned, 4> *LaterRedefs) { bool AnyUnpred = false; bool MaySpec = LaterRedefs != 0; @@ -1484,7 +1506,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, // If the predicated instruction now redefines a register as the result of // if-conversion, add an implicit kill. - UpdatePredRedefs(I, Redefs, TRI, true); + UpdatePredRedefs(I, Redefs, TRI); } std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); @@ -1501,7 +1523,6 @@ void IfConverter::PredicateBlock(BBInfo &BBI, /// the destination block. Skip end of block branches if IgnoreBr is true. void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl<MachineOperand> &Cond, - SmallSet<unsigned, 4> &Redefs, bool IgnoreBr) { MachineFunction &MF = *ToBBI.BB->getParent(); @@ -1514,8 +1535,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, MachineInstr *MI = MF.CloneMachineInstr(I); ToBBI.BB->insert(ToBBI.BB->end(), MI); ToBBI.NonPredSize++; - unsigned ExtraPredCost = 0; - unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost); + unsigned ExtraPredCost = TII->getPredicationCost(&*I); + unsigned NumCycles = SchedModel.computeInstrLatency(&*I, false); if (NumCycles > 1) ToBBI.ExtraCost += NumCycles-1; ToBBI.ExtraCost2 += ExtraPredCost; @@ -1531,7 +1552,11 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, // If the predicated instruction now redefines a register as the result of // if-conversion, add an implicit kill. - UpdatePredRedefs(MI, Redefs, TRI, true); + UpdatePredRedefs(MI, Redefs, TRI); + + // Some kill flags may not be correct anymore. + if (!DontKill.empty()) + RemoveKills(*MI, DontKill, *TRI); } if (!IgnoreBr) { diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp index 35295fe..bb0e642 100644 --- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "regalloc" #include "Spiller.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -21,8 +22,10 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -63,6 +66,7 @@ class InlineSpiller : public Spiller { MachineRegisterInfo &MRI; const TargetInstrInfo &TII; const TargetRegisterInfo &TRI; + const MachineBlockFrequencyInfo &MBFI; // Variables that are valid during spill(), but used by multiple methods. LiveRangeEdit *Edit; @@ -146,7 +150,8 @@ public: MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()), TII(*mf.getTarget().getInstrInfo()), - TRI(*mf.getTarget().getRegisterInfo()) {} + TRI(*mf.getTarget().getRegisterInfo()), + MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {} void spill(LiveRangeEdit &); @@ -174,10 +179,8 @@ private: bool coalesceStackAccess(MachineInstr *MI, unsigned Reg); bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >, MachineInstr *LoadMI = 0); - void insertReload(LiveInterval &NewLI, SlotIndex, - MachineBasicBlock::iterator MI); - void insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, - SlotIndex, MachineBasicBlock::iterator MI); + void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI); + void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI); void spillAroundUses(unsigned Reg); void spillAll(); @@ -337,10 +340,12 @@ static raw_ostream &operator<<(raw_ostream &OS, /// propagateSiblingValue - Propagate the value in SVI to dependents if it is /// known. Otherwise remember the dependency for later. /// -/// @param SVI SibValues entry to propagate. +/// @param SVIIter SibValues entry to propagate. /// @param VNI Dependent value, or NULL to propagate to all saved dependents. -void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVI, +void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter, VNInfo *VNI) { + SibValueMap::value_type *SVI = &*SVIIter; + // When VNI is non-NULL, add it to SVI's deps, and only propagate to that. TinyPtrVector<VNInfo*> FirstDeps; if (VNI) { @@ -352,14 +357,12 @@ void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVI, if (!SVI->second.hasDef()) return; - // Work list of values to propagate. It would be nice to use a SetVector - // here, but then we would be forced to use a SmallSet. - SmallVector<SibValueMap::iterator, 8> WorkList(1, SVI); - SmallPtrSet<VNInfo*, 8> WorkSet; + // Work list of values to propagate. + SmallSetVector<SibValueMap::value_type *, 8> WorkList; + WorkList.insert(SVI); do { SVI = WorkList.pop_back_val(); - WorkSet.erase(SVI->first); TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps; VNI = 0; @@ -450,8 +453,7 @@ void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVI, continue; // Something changed in DepSVI. Propagate to dependents. - if (WorkSet.insert(DepSVI->first)) - WorkList.push_back(DepSVI); + WorkList.insert(&*DepSVI); DEBUG(dbgs() << " update " << DepSVI->first->id << '@' << DepSVI->first->def << " to:\t" << DepSV); @@ -576,7 +578,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { if (isSibling(SrcReg)) { LiveInterval &SrcLI = LIS.getInterval(SrcReg); - LiveRangeQuery SrcQ(SrcLI, VNI->def); + LiveQueryResult SrcQ = SrcLI.Query(VNI->def); assert(SrcQ.valueIn() && "Copy from non-existing value"); // Check if this COPY kills its source. SVI->second.KillsSource = SrcQ.isKill(); @@ -881,12 +883,12 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, } // Alocate a new register for the remat. - LiveInterval &NewLI = Edit->createFrom(Original); - NewLI.markNotSpillable(); + unsigned NewVReg = Edit->createFrom(Original); // Finally we can rematerialize OrigMI before MI. - SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM, + SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewVReg, RM, TRI); + (void)DefIdx; DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *LIS.getInstructionFromIndex(DefIdx)); @@ -894,15 +896,12 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = MI->getOperand(Ops[i].second); if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { - MO.setReg(NewLI.reg); + MO.setReg(NewVReg); MO.setIsKill(); } } - DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); + DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI << '\n'); - VNInfo *DefVNI = NewLI.getNextValue(DefIdx, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(DefIdx, UseIdx.getRegSlot(), DefVNI)); - DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); ++NumRemats; return true; } @@ -1005,6 +1004,40 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { return true; } +#if !defined(NDEBUG) +// Dump the range of instructions from B to E with their slot indexes. +static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B, + MachineBasicBlock::iterator E, + LiveIntervals const &LIS, + const char *const header, + unsigned VReg =0) { + char NextLine = '\n'; + char SlotIndent = '\t'; + + if (llvm::next(B) == E) { + NextLine = ' '; + SlotIndent = ' '; + } + + dbgs() << '\t' << header << ": " << NextLine; + + for (MachineBasicBlock::iterator I = B; I != E; ++I) { + SlotIndex Idx = LIS.getInstructionIndex(I).getRegSlot(); + + // If a register was passed in and this instruction has it as a + // destination that is marked as an early clobber, print the + // early-clobber slot index. + if (VReg) { + MachineOperand *MO = I->findRegisterDefOperand(VReg); + if (MO && MO->isEarlyClobber()) + Idx = Idx.getRegSlot(true); + } + + dbgs() << SlotIndent << Idx << '\t' << *I; + } +} +#endif + /// foldMemoryOperand - Try folding stack slot references in Ops into their /// instructions. /// @@ -1024,6 +1057,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, bool WasCopy = MI->isCopy(); unsigned ImpReg = 0; + bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::PATCHPOINT || + MI->getOpcode() == TargetOpcode::STACKMAP); + // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied // operands. SmallVector<unsigned, 8> FoldOps; @@ -1035,7 +1071,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, continue; } // FIXME: Teach targets to deal with subregs. - if (MO.getSubReg()) + if (!SpillSubRegs && MO.getSubReg()) return false; // We cannot fold a load instruction into a def. if (LoadMI && MO.isDef()) @@ -1045,14 +1081,52 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, FoldOps.push_back(Idx); } + MachineInstrSpan MIS(MI); + MachineInstr *FoldMI = LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI) : TII.foldMemoryOperand(MI, FoldOps, StackSlot); if (!FoldMI) return false; + + // Remove LIS for any dead defs in the original MI not in FoldMI. + for (MIBundleOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || + MRI.isReserved(Reg)) { + continue; + } + MIBundleOperands::PhysRegInfo RI = + MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI); + if (MO->readsReg()) { + assert(RI.Reads && "Cannot fold physreg reader"); + continue; + } + if (RI.Defines) + continue; + // FoldMI does not define this physreg. Remove the LI segment. + assert(MO->isDead() && "Cannot fold physreg def"); + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) { + if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) { + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + if (VNInfo *VNI = LR->getVNInfoAt(Idx)) + LR->removeValNo(VNI); + } + } + } + LIS.ReplaceMachineInstrInMaps(MI, FoldMI); MI->eraseFromParent(); + // Insert any new instructions other than FoldMI into the LIS maps. + assert(!MIS.empty() && "Unexpected empty span of instructions!"); + for (MachineBasicBlock::iterator MII = MIS.begin(), End = MIS.end(); + MII != End; ++MII) + if (&*MII != FoldMI) + LIS.InsertMachineInstrInMaps(&*MII); + // TII.foldMemoryOperand may have left some implicit operands on the // instruction. Strip them. if (ImpReg) @@ -1064,8 +1138,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, FoldMI->RemoveOperand(i - 1); } - DEBUG(dbgs() << "\tfolded: " << LIS.getInstructionIndex(FoldMI) << '\t' - << *FoldMI); + DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS, + "folded")); + if (!WasCopy) ++NumFolded; else if (Ops.front().second == 0) @@ -1075,36 +1150,35 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, return true; } -/// insertReload - Insert a reload of NewLI.reg before MI. -void InlineSpiller::insertReload(LiveInterval &NewLI, +void InlineSpiller::insertReload(unsigned NewVReg, SlotIndex Idx, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot, - MRI.getRegClass(NewLI.reg), &TRI); - --MI; // Point to load instruction. - SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); - // Some (out-of-tree) targets have EC reload instructions. - if (MachineOperand *MO = MI->findRegisterDefOperand(NewLI.reg)) - if (MO->isEarlyClobber()) - LoadIdx = LoadIdx.getRegSlot(true); - DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); - VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); + + MachineInstrSpan MIS(MI); + TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot, + MRI.getRegClass(NewVReg), &TRI); + + LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI); + + DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload", + NewVReg)); ++NumReloads; } -/// insertSpill - Insert a spill of NewLI.reg after MI. -void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, - SlotIndex Idx, MachineBasicBlock::iterator MI) { +/// insertSpill - Insert a spill of NewVReg after MI. +void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, + MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot, - MRI.getRegClass(NewLI.reg), &TRI); - --MI; // Point to store instruction. - SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); - DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); - VNInfo *StoreVNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); + + MachineInstrSpan MIS(MI); + TII.storeRegToStackSlot(MBB, llvm::next(MI), NewVReg, isKill, StackSlot, + MRI.getRegClass(NewVReg), &TRI); + + LIS.InsertMachineInstrRangeInMaps(llvm::next(MI), MIS.end()); + + DEBUG(dumpMachineInstrRangeWithSlotIndex(llvm::next(MI), MIS.end(), LIS, + "spill")); ++NumSpills; } @@ -1120,18 +1194,14 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Debug values are not allowed to affect codegen. if (MI->isDebugValue()) { // Modify DBG_VALUE now that the value is in a spill slot. - uint64_t Offset = MI->getOperand(1).getImm(); + bool IsIndirect = MI->isIndirectDebugValue(); + uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; const MDNode *MDPtr = MI->getOperand(2).getMetadata(); DebugLoc DL = MI->getDebugLoc(); - if (MachineInstr *NewDV = TII.emitFrameIndexDebugValue(MF, StackSlot, - Offset, MDPtr, DL)) { - DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); - MachineBasicBlock *MBB = MI->getParent(); - MBB->insert(MBB->erase(MI), NewDV); - } else { - DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); - MI->eraseFromParent(); - } + DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); + MachineBasicBlock *MBB = MI->getParent(); + BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(StackSlot).addImm(Offset).addMetadata(MDPtr); continue; } @@ -1184,19 +1254,18 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { if (foldMemoryOperand(Ops)) continue; - // Allocate interval around instruction. + // Create a new virtual register for spill/fill. // FIXME: Infer regclass from instruction alone. - LiveInterval &NewLI = Edit->createFrom(Reg); - NewLI.markNotSpillable(); + unsigned NewVReg = Edit->createFrom(Reg); if (RI.Reads) - insertReload(NewLI, Idx, MI); + insertReload(NewVReg, Idx, MI); // Rewrite instruction operands. bool hasLiveDef = false; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second); - MO.setReg(NewLI.reg); + MO.setReg(NewVReg); if (MO.isUse()) { if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second)) MO.setIsKill(); @@ -1205,21 +1274,12 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { hasLiveDef = true; } } - DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI); + DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n'); // FIXME: Use a second vreg if instruction has no tied ops. - if (RI.Writes) { + if (RI.Writes) if (hasLiveDef) - insertSpill(NewLI, OldLI, Idx, MI); - else { - // This instruction defines a dead value. We don't need to spill it, - // but do create a live range for the dead value. - VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(Idx, Idx.getDeadSlot(), VNI)); - } - } - - DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); + insertSpill(NewVReg, true, MI); } } @@ -1238,8 +1298,8 @@ void InlineSpiller::spillAll() { assert(StackInt->getNumValNums() == 1 && "Bad stack interval values"); for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) - StackInt->MergeRangesInAsValue(LIS.getInterval(RegsToSpill[i]), - StackInt->getValNumInfo(0)); + StackInt->MergeSegmentsInAsValue(LIS.getInterval(RegsToSpill[i]), + StackInt->getValNumInfo(0)); DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); // Spill around uses of all RegsToSpill. @@ -1280,8 +1340,8 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { DEBUG(dbgs() << "Inline spilling " << MRI.getRegClass(edit.getReg())->getName() - << ':' << PrintReg(edit.getReg()) << ' ' << edit.getParent() - << "\nFrom original " << LIS.getInterval(Original) << '\n'); + << ':' << edit.getParent() + << "\nFrom original " << PrintReg(Original) << '\n'); assert(edit.getParent().isSpillable() && "Attempting to spill already spilled value."); assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); @@ -1294,5 +1354,5 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { if (!RegsToSpill.empty()) spillAll(); - Edit->calculateRegClassAndHint(MF, Loops); + Edit->calculateRegClassAndHint(MF, Loops, MBFI); } diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp index a8e711e..427225d 100644 --- a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp @@ -204,11 +204,11 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { // Fixed interference. for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { LiveInterval::iterator &I = RegUnits[i].FixedI; - LiveInterval *LI = RegUnits[i].Fixed; - if (I == LI->end() || I->start >= Stop) + LiveRange *LR = RegUnits[i].Fixed; + if (I == LR->end() || I->start >= Stop) continue; - I = LI->advanceTo(I, Stop); - bool Backup = I == LI->end() || I->start >= Stop; + I = LR->advanceTo(I, Stop); + bool Backup = I == LR->end() || I->start >= Stop; if (Backup) --I; SlotIndex StopI = I->end; diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm/lib/CodeGen/InterferenceCache.h index c02fb9a..800f705 100644 --- a/contrib/llvm/lib/CodeGen/InterferenceCache.h +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.h @@ -72,7 +72,7 @@ class InterferenceCache { unsigned VirtTag; /// Fixed interference in RegUnit. - LiveInterval *Fixed; + LiveRange *Fixed; /// Iterator pointing into the fixed RegUnit interference. LiveInterval::iterator FixedI; diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp index d894f66..c38d4fb 100644 --- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -485,11 +485,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memset: { - Type *IntPtr = TD.getIntPtrType(Context); + Value *Op0 = CI->getArgOperand(0); + Type *IntPtr = TD.getIntPtrType(Op0->getType()); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; - Ops[0] = CI->getArgOperand(0); + Ops[0] = Op0; // Extend the amount to i32. Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), Type::getInt32Ty(Context), diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 1a09837..ad2c553 100644 --- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -62,6 +62,17 @@ static bool getVerboseAsm() { llvm_unreachable("Invalid verbose asm state"); } +void LLVMTargetMachine::initAsmInfo() { + AsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(), TargetTriple); + // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, + // and if the old one gets included then MCAsmInfo will be NULL and + // we'll crash later. + // Provide the user with a useful error message about what's wrong. + assert(AsmInfo && "MCAsmInfo not initialized. " + "Make sure you include the correct TargetSelect.h" + "and that InitializeAllTargetMCs() is being invoked!"); +} + LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, StringRef CPU, StringRef FS, TargetOptions Options, @@ -69,18 +80,10 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, CodeGenOpt::Level OL) : TargetMachine(T, Triple, CPU, FS, Options) { CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL); - AsmInfo = T.createMCAsmInfo(Triple); - // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, - // and if the old one gets included then MCAsmInfo will be NULL and - // we'll crash later. - // Provide the user with a useful error message about what's wrong. - assert(AsmInfo && "MCAsmInfo not initialized." - "Make sure you include the correct TargetSelect.h" - "and that InitializeAllTargetMCs() is being invoked!"); } void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) { - PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createBasicTargetTransformInfoPass(this)); } /// addPassesToX helper drives creation and initialization of TargetPassConfig. @@ -112,7 +115,6 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(), &TM->getTargetLowering()->getObjFileLowering()); PM.add(MMI); - MCContext *Context = &MMI->getContext(); // Return the MCContext by-ref. // Set up a MachineFunction for the rest of CodeGen to work on. PM.add(new MachineFunctionAnalysis(*TM)); @@ -131,7 +133,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, PassConfig->setInitialized(); - return Context; + return &MMI->getContext(); } bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, @@ -161,6 +163,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, const MCAsmInfo &MAI = *getMCAsmInfo(); const MCRegisterInfo &MRI = *getRegisterInfo(); + const MCInstrInfo &MII = *getInstrInfo(); const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); OwningPtr<MCStreamer> AsmStreamer; @@ -168,19 +171,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_AssemblyFile: { MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, - *getInstrInfo(), - Context->getRegisterInfo(), STI); + MII, MRI, STI); // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = 0; - MCAsmBackend *MAB = 0; - if (ShowMCEncoding) { - const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); - MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, - *Context); - MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); - } + if (ShowMCEncoding) + MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), + TargetCPU); MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, getVerboseAsm(), hasMCUseLoc(), @@ -195,9 +194,9 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, - STI, *Context); - MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, + *Context); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); if (MCE == 0 || MAB == 0) return true; @@ -232,7 +231,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, /// addPassesToEmitMachineCode - Add passes to the specified pass manager to /// get machine code emitted. This uses a JITCodeEmitter object to handle /// actually outputting the machine code and resolving things like the address -/// of functions. This method should returns true if machine code emission is +/// of functions. This method should return true if machine code emission is /// not supported. /// bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, @@ -271,7 +270,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, *Ctx); - MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), + TargetCPU); if (MCE == 0 || MAB == 0) return true; diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp index 8172154..ffe407a 100644 --- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp @@ -212,15 +212,15 @@ LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) { /// constructScopeNest void LexicalScopes::constructScopeNest(LexicalScope *Scope) { - assert (Scope && "Unable to calculate scop edominance graph!"); + assert (Scope && "Unable to calculate scope dominance graph!"); SmallVector<LexicalScope *, 4> WorkStack; WorkStack.push_back(Scope); unsigned Counter = 0; while (!WorkStack.empty()) { LexicalScope *WS = WorkStack.back(); - const SmallVector<LexicalScope *, 4> &Children = WS->getChildren(); + const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren(); bool visitedChildren = false; - for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(), + for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(), SE = Children.end(); SI != SE; ++SI) { LexicalScope *ChildScope = *SI; if (!ChildScope->getDFSOut()) { @@ -279,8 +279,8 @@ getMachineBasicBlocks(DebugLoc DL, return; } - SmallVector<InsnRange, 4> &InsnRanges = Scope->getRanges(); - for (SmallVector<InsnRange, 4>::iterator I = InsnRanges.begin(), + SmallVectorImpl<InsnRange> &InsnRanges = Scope->getRanges(); + for (SmallVectorImpl<InsnRange>::iterator I = InsnRanges.begin(), E = InsnRanges.end(); I != E; ++I) { InsnRange &R = *I; MBBs.insert(R.first->getParent()); diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp index 0b117ac..25645e0 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -108,6 +108,7 @@ class LDVImpl; class UserValue { const MDNode *variable; ///< The debug info variable we are part of. unsigned offset; ///< Byte offset into variable. + bool IsIndirect; ///< true if this is a register-indirect+offset value. DebugLoc dl; ///< The debug location for the variable. This is ///< used by dwarf writer to find lexical scope. UserValue *leader; ///< Equivalence class leader. @@ -130,13 +131,15 @@ class UserValue { /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs /// is live. Returns true if any changes were made. - bool splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs); + bool splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, + LiveIntervals &LIS); public: /// UserValue - Create a new UserValue. - UserValue(const MDNode *var, unsigned o, DebugLoc L, + UserValue(const MDNode *var, unsigned o, bool i, DebugLoc L, LocMap::Allocator &alloc) - : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc) + : variable(var), offset(o), IsIndirect(i), dl(L), leader(this), + next(0), locInts(alloc) {} /// getLeader - Get the leader of this value's equivalence class. @@ -217,13 +220,13 @@ public: /// End points where VNI is no longer live are added to Kills. /// @param Idx Starting point for the definition. /// @param LocNo Location number to propagate. - /// @param LI Restrict liveness to where LI has the value VNI. May be null. - /// @param VNI When LI is not null, this is the value to restrict to. + /// @param LR Restrict liveness to where LR has the value VNI. May be null. + /// @param VNI When LR is not null, this is the value to restrict to. /// @param Kills Append end points of VNI's live range to Kills. /// @param LIS Live intervals analysis. /// @param MDT Dominator tree. void extendDef(SlotIndex Idx, unsigned LocNo, - LiveInterval *LI, const VNInfo *VNI, + LiveRange *LR, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS); @@ -249,7 +252,8 @@ public: /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is /// live. Returns true if any changes were made. - bool splitRegister(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs); + bool splitRegister(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, + LiveIntervals &LIS); /// rewriteLocations - Rewrite virtual register locations according to the /// provided virtual register map. @@ -299,7 +303,8 @@ class LDVImpl { UVMap userVarMap; /// getUserValue - Find or create a UserValue. - UserValue *getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL); + UserValue *getUserValue(const MDNode *Var, unsigned Offset, + bool IsIndirect, DebugLoc DL); /// lookupVirtReg - Find the EC leader for VirtReg or null. UserValue *lookupVirtReg(unsigned VirtReg); @@ -342,7 +347,7 @@ public: void mapVirtReg(unsigned VirtReg, UserValue *EC); /// splitRegister - Replace all references to OldReg with NewRegs. - void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs); + void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs); /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. void emitDebugValues(VirtRegMap *VRM); @@ -414,7 +419,7 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) { } UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset, - DebugLoc DL) { + bool IsIndirect, DebugLoc DL) { UserValue *&Leader = userVarMap[Var]; if (Leader) { UserValue *UV = Leader->getLeader(); @@ -424,7 +429,7 @@ UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset, return UV; } - UserValue *UV = new UserValue(Var, Offset, DL, allocator); + UserValue *UV = new UserValue(Var, Offset, IsIndirect, DL, allocator); userValues.push_back(UV); Leader = UserValue::merge(Leader, UV); return UV; @@ -445,15 +450,18 @@ UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { // DBG_VALUE loc, offset, variable if (MI->getNumOperands() != 3 || - !MI->getOperand(1).isImm() || !MI->getOperand(2).isMetadata()) { + !(MI->getOperand(1).isReg() || MI->getOperand(1).isImm()) || + !MI->getOperand(2).isMetadata()) { DEBUG(dbgs() << "Can't handle " << *MI); return false; } // Get or create the UserValue for (variable,offset). - unsigned Offset = MI->getOperand(1).getImm(); + bool IsIndirect = MI->isIndirectDebugValue(); + unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; const MDNode *Var = MI->getOperand(2).getMetadata(); - UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc()); + //here. + UserValue *UV = getUserValue(Var, Offset, IsIndirect, MI->getDebugLoc()); UV->addDef(Idx, MI->getOperand(0)); return true; } @@ -487,7 +495,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { } void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, - LiveInterval *LI, const VNInfo *VNI, + LiveRange *LR, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS) { @@ -501,15 +509,15 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, // Limit to VNI's live range. bool ToEnd = true; - if (LI && VNI) { - LiveRange *Range = LI->getLiveRangeContaining(Start); - if (!Range || Range->valno != VNI) { + if (LR && VNI) { + LiveInterval::Segment *Segment = LR->getSegmentContaining(Start); + if (!Segment || Segment->valno != VNI) { if (Kills) Kills->push_back(Start); continue; } - if (Range->end < Stop) - Stop = Range->end, ToEnd = false; + if (Segment->end < Stop) + Stop = Segment->end, ToEnd = false; } // There could already be a short def at Start. @@ -661,10 +669,10 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, // For physregs, use the live range of the first regunit as a guide. unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI); - LiveInterval *LI = &LIS.getRegUnit(Unit); - const VNInfo *VNI = LI->getVNInfoAt(Idx); + LiveRange *LR = &LIS.getRegUnit(Unit); + const VNInfo *VNI = LR->getVNInfoAt(Idx); // Don't track copies from physregs, it is too expensive. - extendDef(Idx, LocNo, LI, VNI, 0, LIS, MDT, UVS); + extendDef(Idx, LocNo, LR, VNI, 0, LIS, MDT, UVS); } // Finally, erase all the undefs. @@ -724,7 +732,8 @@ LiveDebugVariables::~LiveDebugVariables() { //===----------------------------------------------------------------------===// bool -UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) { +UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, + LiveIntervals& LIS) { DEBUG({ dbgs() << "Splitting Loc" << OldLocNo << '\t'; print(dbgs(), 0); @@ -733,7 +742,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) { LocMap::iterator LocMapI; LocMapI.setMap(locInts); for (unsigned i = 0; i != NewRegs.size(); ++i) { - LiveInterval *LI = NewRegs[i]; + LiveInterval *LI = &LIS.getInterval(NewRegs[i]); if (LI->empty()) continue; @@ -822,7 +831,8 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) { } bool -UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { +UserValue::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, + LiveIntervals &LIS) { bool DidChange = false; // Split locations referring to OldReg. Iterate backwards so splitLocation can // safely erase unused locations. @@ -831,15 +841,15 @@ UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { const MachineOperand *Loc = &locations[LocNo]; if (!Loc->isReg() || Loc->getReg() != OldReg) continue; - DidChange |= splitLocation(LocNo, NewRegs); + DidChange |= splitLocation(LocNo, NewRegs, LIS); } return DidChange; } -void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { +void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs) { bool DidChange = false; for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext()) - DidChange |= UV->splitRegister(OldReg, NewRegs); + DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS); if (!DidChange) return; @@ -847,11 +857,11 @@ void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { // Map all of the new virtual registers. UserValue *UV = lookupVirtReg(OldReg); for (unsigned i = 0; i != NewRegs.size(); ++i) - mapVirtReg(NewRegs[i]->reg, UV); + mapVirtReg(NewRegs[i], UV); } void LiveDebugVariables:: -splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { +splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS) { if (pImpl) static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs); } @@ -921,19 +931,12 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, MachineOperand &Loc = locations[LocNo]; ++NumInsertedDebugValues; - // Frame index locations may require a target callback. - if (Loc.isFI()) { - MachineInstr *MI = TII.emitFrameIndexDebugValue(*MBB->getParent(), - Loc.getIndex(), offset, variable, - findDebugLoc()); - if (MI) { - MBB->insert(I, MI); - return; - } - } - // This is not a frame index, or the target is happy with a standard FI. - BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) - .addOperand(Loc).addImm(offset).addMetadata(variable); + if (Loc.isReg()) + BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), + IsIndirect, Loc.getReg(), offset, variable); + else + BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) + .addOperand(Loc).addImm(offset).addMetadata(variable); } void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, @@ -992,4 +995,3 @@ void LiveDebugVariables::dump() { static_cast<LDVImpl*>(pImpl)->print(dbgs()); } #endif - diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h index 3ce3c39..58a3f0f 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h @@ -27,6 +27,7 @@ namespace llvm { class LiveInterval; +class LiveIntervals; class VirtRegMap; class LiveDebugVariables : public MachineFunctionPass { @@ -47,7 +48,8 @@ public: /// splitRegister - Move any user variables in OldReg to the live ranges in /// NewRegs where they are live. Mark the values as unavailable where no new /// register is live. - void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs); + void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, + LiveIntervals &LIS); /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes /// that happened during register allocation. diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp index dccd847..2b8feb8 100644 --- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -9,12 +9,12 @@ // // This file implements the LiveRange and LiveInterval classes. Given some // numbering of each the machine instructions an interval [i, j) is said to be a -// live interval for register v if there is no instruction with number j' > j +// live range for register v if there is no instruction with number j' >= j // such that v is live at j' and there is no instruction with number i' < i such -// that v is live at i'. In this implementation intervals can have holes, -// i.e. an interval might look like [1,20), [50,65), [1000,1001). Each -// individual range is represented as an instance of LiveRange, and the whole -// interval is represented as an instance of LiveInterval. +// that v is live at i'. In this implementation ranges can have holes, +// i.e. a range might look like [1,20), [50,65), [1000,1001). Each +// individual segment is represented as an instance of LiveRange::Segment, +// and the whole range is represented as an instance of LiveRange. // //===----------------------------------------------------------------------===// @@ -31,14 +31,14 @@ #include <algorithm> using namespace llvm; -LiveInterval::iterator LiveInterval::find(SlotIndex Pos) { +LiveRange::iterator LiveRange::find(SlotIndex Pos) { // This algorithm is basically std::upper_bound. // Unfortunately, std::upper_bound cannot be used with mixed types until we // adopt C++0x. Many libraries can do it, but not all. if (empty() || Pos >= endIndex()) return end(); iterator I = begin(); - size_t Len = ranges.size(); + size_t Len = size(); do { size_t Mid = Len >> 1; if (Pos < I[Mid].end) @@ -49,13 +49,13 @@ LiveInterval::iterator LiveInterval::find(SlotIndex Pos) { return I; } -VNInfo *LiveInterval::createDeadDef(SlotIndex Def, - VNInfo::Allocator &VNInfoAllocator) { +VNInfo *LiveRange::createDeadDef(SlotIndex Def, + VNInfo::Allocator &VNInfoAllocator) { assert(!Def.isDead() && "Cannot define a value at the dead slot"); iterator I = find(Def); if (I == end()) { VNInfo *VNI = getNextValue(Def, VNInfoAllocator); - ranges.push_back(LiveRange(Def, Def.getDeadSlot(), VNI)); + segments.push_back(Segment(Def, Def.getDeadSlot(), VNI)); return VNI; } if (SlotIndex::isSameInstr(Def, I->start)) { @@ -73,11 +73,11 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, } assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def"); VNInfo *VNI = getNextValue(Def, VNInfoAllocator); - ranges.insert(I, LiveRange(Def, Def.getDeadSlot(), VNI)); + segments.insert(I, Segment(Def, Def.getDeadSlot(), VNI)); return VNI; } -// overlaps - Return true if the intersection of the two live intervals is +// overlaps - Return true if the intersection of the two live ranges is // not empty. // // An example for overlaps(): @@ -86,7 +86,7 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, // 4: B = ... // 8: C = A + B ;; last use of A // -// The live intervals should look like: +// The live ranges should look like: // // A = [3, 11) // B = [7, x) @@ -95,9 +95,9 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, // A->overlaps(C) should return false since we want to be able to join // A and C. // -bool LiveInterval::overlapsFrom(const LiveInterval& other, - const_iterator StartPos) const { - assert(!empty() && "empty interval"); +bool LiveRange::overlapsFrom(const LiveRange& other, + const_iterator StartPos) const { + assert(!empty() && "empty range"); const_iterator i = begin(); const_iterator ie = end(); const_iterator j = StartPos; @@ -108,13 +108,13 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other, if (i->start < j->start) { i = std::upper_bound(i, ie, j->start); - if (i != ranges.begin()) --i; + if (i != begin()) --i; } else if (j->start < i->start) { ++StartPos; if (StartPos != other.end() && StartPos->start <= i->start) { assert(StartPos < other.end() && i < end()); j = std::upper_bound(j, je, i->start); - if (j != other.ranges.begin()) --j; + if (j != other.begin()) --j; } } else { return true; @@ -136,10 +136,9 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other, return false; } -bool LiveInterval::overlaps(const LiveInterval &Other, - const CoalescerPair &CP, - const SlotIndexes &Indexes) const { - assert(!empty() && "empty interval"); +bool LiveRange::overlaps(const LiveRange &Other, const CoalescerPair &CP, + const SlotIndexes &Indexes) const { + assert(!empty() && "empty range"); if (Other.empty()) return false; @@ -178,9 +177,9 @@ bool LiveInterval::overlaps(const LiveInterval &Other, } } -/// overlaps - Return true if the live interval overlaps a range specified +/// overlaps - Return true if the live range overlaps an interval specified /// by [Start, End). -bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { +bool LiveRange::overlaps(SlotIndex Start, SlotIndex End) const { assert(Start < End && "Invalid range"); const_iterator I = std::lower_bound(begin(), end(), End); return I != begin() && (--I)->end > Start; @@ -190,7 +189,7 @@ bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { /// ValNo is dead, remove it. If it is the largest value number, just nuke it /// (and any other deleted values neighboring it), otherwise mark it as ~1U so /// it can be nuked later. -void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { +void LiveRange::markValNoForDeletion(VNInfo *ValNo) { if (ValNo->id == getNumValNums()-1) { do { valnos.pop_back(); @@ -202,137 +201,135 @@ void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { /// RenumberValues - Renumber all values in order of appearance and delete the /// remaining unused values. -void LiveInterval::RenumberValues(LiveIntervals &lis) { +void LiveRange::RenumberValues() { SmallPtrSet<VNInfo*, 8> Seen; valnos.clear(); for (const_iterator I = begin(), E = end(); I != E; ++I) { VNInfo *VNI = I->valno; if (!Seen.insert(VNI)) continue; - assert(!VNI->isUnused() && "Unused valno used by live range"); + assert(!VNI->isUnused() && "Unused valno used by live segment"); VNI->id = (unsigned)valnos.size(); valnos.push_back(VNI); } } -/// extendIntervalEndTo - This method is used when we want to extend the range -/// specified by I to end at the specified endpoint. To do this, we should -/// merge and eliminate all ranges that this will overlap with. The iterator is -/// not invalidated. -void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { - assert(I != ranges.end() && "Not a valid interval!"); +/// This method is used when we want to extend the segment specified by I to end +/// at the specified endpoint. To do this, we should merge and eliminate all +/// segments that this will overlap with. The iterator is not invalidated. +void LiveRange::extendSegmentEndTo(iterator I, SlotIndex NewEnd) { + assert(I != end() && "Not a valid segment!"); VNInfo *ValNo = I->valno; - // Search for the first interval that we can't merge with. - Ranges::iterator MergeTo = llvm::next(I); - for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) { + // Search for the first segment that we can't merge with. + iterator MergeTo = llvm::next(I); + for (; MergeTo != end() && NewEnd >= MergeTo->end; ++MergeTo) { assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); } - // If NewEnd was in the middle of an interval, make sure to get its endpoint. + // If NewEnd was in the middle of a segment, make sure to get its endpoint. I->end = std::max(NewEnd, prior(MergeTo)->end); - // If the newly formed range now touches the range after it and if they have - // the same value number, merge the two ranges into one range. - if (MergeTo != ranges.end() && MergeTo->start <= I->end && + // If the newly formed segment now touches the segment after it and if they + // have the same value number, merge the two segments into one segment. + if (MergeTo != end() && MergeTo->start <= I->end && MergeTo->valno == ValNo) { I->end = MergeTo->end; ++MergeTo; } - // Erase any dead ranges. - ranges.erase(llvm::next(I), MergeTo); + // Erase any dead segments. + segments.erase(llvm::next(I), MergeTo); } -/// extendIntervalStartTo - This method is used when we want to extend the range -/// specified by I to start at the specified endpoint. To do this, we should -/// merge and eliminate all ranges that this will overlap with. -LiveInterval::Ranges::iterator -LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) { - assert(I != ranges.end() && "Not a valid interval!"); +/// This method is used when we want to extend the segment specified by I to +/// start at the specified endpoint. To do this, we should merge and eliminate +/// all segments that this will overlap with. +LiveRange::iterator +LiveRange::extendSegmentStartTo(iterator I, SlotIndex NewStart) { + assert(I != end() && "Not a valid segment!"); VNInfo *ValNo = I->valno; - // Search for the first interval that we can't merge with. - Ranges::iterator MergeTo = I; + // Search for the first segment that we can't merge with. + iterator MergeTo = I; do { - if (MergeTo == ranges.begin()) { + if (MergeTo == begin()) { I->start = NewStart; - ranges.erase(MergeTo, I); + segments.erase(MergeTo, I); return I; } assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); --MergeTo; } while (NewStart <= MergeTo->start); - // If we start in the middle of another interval, just delete a range and - // extend that interval. + // If we start in the middle of another segment, just delete a range and + // extend that segment. if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) { MergeTo->end = I->end; } else { - // Otherwise, extend the interval right after. + // Otherwise, extend the segment right after. ++MergeTo; MergeTo->start = NewStart; MergeTo->end = I->end; } - ranges.erase(llvm::next(MergeTo), llvm::next(I)); + segments.erase(llvm::next(MergeTo), llvm::next(I)); return MergeTo; } -LiveInterval::iterator -LiveInterval::addRangeFrom(LiveRange LR, iterator From) { - SlotIndex Start = LR.start, End = LR.end; - iterator it = std::upper_bound(From, ranges.end(), Start); +LiveRange::iterator LiveRange::addSegmentFrom(Segment S, iterator From) { + SlotIndex Start = S.start, End = S.end; + iterator it = std::upper_bound(From, end(), Start); - // If the inserted interval starts in the middle or right at the end of - // another interval, just extend that interval to contain the range of LR. - if (it != ranges.begin()) { + // If the inserted segment starts in the middle or right at the end of + // another segment, just extend that segment to contain the segment of S. + if (it != begin()) { iterator B = prior(it); - if (LR.valno == B->valno) { + if (S.valno == B->valno) { if (B->start <= Start && B->end >= Start) { - extendIntervalEndTo(B, End); + extendSegmentEndTo(B, End); return B; } } else { - // Check to make sure that we are not overlapping two live ranges with + // Check to make sure that we are not overlapping two live segments with // different valno's. assert(B->end <= Start && - "Cannot overlap two LiveRanges with differing ValID's" + "Cannot overlap two segments with differing ValID's" " (did you def the same reg twice in a MachineInstr?)"); } } - // Otherwise, if this range ends in the middle of, or right next to, another - // interval, merge it into that interval. - if (it != ranges.end()) { - if (LR.valno == it->valno) { + // Otherwise, if this segment ends in the middle of, or right next to, another + // segment, merge it into that segment. + if (it != end()) { + if (S.valno == it->valno) { if (it->start <= End) { - it = extendIntervalStartTo(it, Start); + it = extendSegmentStartTo(it, Start); - // If LR is a complete superset of an interval, we may need to grow its + // If S is a complete superset of a segment, we may need to grow its // endpoint as well. if (End > it->end) - extendIntervalEndTo(it, End); + extendSegmentEndTo(it, End); return it; } } else { - // Check to make sure that we are not overlapping two live ranges with + // Check to make sure that we are not overlapping two live segments with // different valno's. assert(it->start >= End && - "Cannot overlap two LiveRanges with differing ValID's"); + "Cannot overlap two segments with differing ValID's"); } } - // Otherwise, this is just a new range that doesn't interact with anything. + // Otherwise, this is just a new segment that doesn't interact with anything. // Insert it. - return ranges.insert(it, LR); + return segments.insert(it, S); } -/// extendInBlock - If this interval is live before Kill in the basic +/// extendInBlock - If this range is live before Kill in the basic /// block that starts at StartIdx, extend it to be live up to Kill and return /// the value. If there is no live range before Kill, return NULL. -VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { +VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { if (empty()) return 0; iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot()); @@ -342,20 +339,21 @@ VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { if (I->end <= StartIdx) return 0; if (I->end < Kill) - extendIntervalEndTo(I, Kill); + extendSegmentEndTo(I, Kill); return I->valno; } -/// removeRange - Remove the specified range from this interval. Note that -/// the range must be in a single LiveRange in its entirety. -void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, - bool RemoveDeadValNo) { - // Find the LiveRange containing this span. - Ranges::iterator I = find(Start); - assert(I != ranges.end() && "Range is not in interval!"); - assert(I->containsRange(Start, End) && "Range is not entirely in interval!"); +/// Remove the specified segment from this range. Note that the segment must +/// be in a single Segment in its entirety. +void LiveRange::removeSegment(SlotIndex Start, SlotIndex End, + bool RemoveDeadValNo) { + // Find the Segment containing this span. + iterator I = find(Start); + assert(I != end() && "Segment is not in range!"); + assert(I->containsInterval(Start, End) + && "Segment is not entirely in range!"); - // If the span we are removing is at the start of the LiveRange, adjust it. + // If the span we are removing is at the start of the Segment, adjust it. VNInfo *ValNo = I->valno; if (I->start == Start) { if (I->end == End) { @@ -373,54 +371,50 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, } } - ranges.erase(I); // Removed the whole LiveRange. + segments.erase(I); // Removed the whole Segment. } else I->start = End; return; } - // Otherwise if the span we are removing is at the end of the LiveRange, + // Otherwise if the span we are removing is at the end of the Segment, // adjust the other way. if (I->end == End) { I->end = Start; return; } - // Otherwise, we are splitting the LiveRange into two pieces. + // Otherwise, we are splitting the Segment into two pieces. SlotIndex OldEnd = I->end; - I->end = Start; // Trim the old interval. + I->end = Start; // Trim the old segment. // Insert the new one. - ranges.insert(llvm::next(I), LiveRange(End, OldEnd, ValNo)); + segments.insert(llvm::next(I), Segment(End, OldEnd, ValNo)); } -/// removeValNo - Remove all the ranges defined by the specified value#. +/// removeValNo - Remove all the segments defined by the specified value#. /// Also remove the value# from value# list. -void LiveInterval::removeValNo(VNInfo *ValNo) { +void LiveRange::removeValNo(VNInfo *ValNo) { if (empty()) return; - Ranges::iterator I = ranges.end(); - Ranges::iterator E = ranges.begin(); + iterator I = end(); + iterator E = begin(); do { --I; if (I->valno == ValNo) - ranges.erase(I); + segments.erase(I); } while (I != E); // Now that ValNo is dead, remove it. markValNoForDeletion(ValNo); } -/// join - Join two live intervals (this, and other) together. This applies -/// mappings to the value numbers in the LHS/RHS intervals as specified. If -/// the intervals are not joinable, this aborts. -void LiveInterval::join(LiveInterval &Other, - const int *LHSValNoAssignments, - const int *RHSValNoAssignments, - SmallVector<VNInfo*, 16> &NewVNInfo, - MachineRegisterInfo *MRI) { +void LiveRange::join(LiveRange &Other, + const int *LHSValNoAssignments, + const int *RHSValNoAssignments, + SmallVectorImpl<VNInfo *> &NewVNInfo) { verify(); - // Determine if any of our live range values are mapped. This is uncommon, so - // we want to avoid the interval scan if not. + // Determine if any of our values are mapped. This is uncommon, so we want + // to avoid the range scan if not. bool MustMapCurValNos = false; unsigned NumVals = getNumValNums(); unsigned NumNewVals = NewVNInfo.size(); @@ -433,8 +427,7 @@ void LiveInterval::join(LiveInterval &Other, } } - // If we have to apply a mapping to our base interval assignment, rewrite it - // now. + // If we have to apply a mapping to our base range assignment, rewrite it now. if (MustMapCurValNos && !empty()) { // Map the first live range. @@ -445,12 +438,12 @@ void LiveInterval::join(LiveInterval &Other, assert(nextValNo != 0 && "Huh?"); // If this live range has the same value # as its immediate predecessor, - // and if they are neighbors, remove one LiveRange. This happens when we + // and if they are neighbors, remove one Segment. This happens when we // have [0,4:0)[4,7:1) and map 0/1 onto the same value #. if (OutIt->valno == nextValNo && OutIt->end == I->start) { OutIt->end = I->end; } else { - // Didn't merge. Move OutIt to the next interval, + // Didn't merge. Move OutIt to the next segment, ++OutIt; OutIt->valno = nextValNo; if (OutIt != I) { @@ -459,9 +452,9 @@ void LiveInterval::join(LiveInterval &Other, } } } - // If we merge some live ranges, chop off the end. + // If we merge some segments, chop off the end. ++OutIt; - ranges.erase(OutIt, end()); + segments.erase(OutIt, end()); } // Rewrite Other values before changing the VNInfo ids. @@ -472,7 +465,7 @@ void LiveInterval::join(LiveInterval &Other, I->valno = NewVNInfo[RHSValNoAssignments[I->valno->id]]; // Update val# info. Renumber them and make sure they all belong to this - // LiveInterval now. Also remove dead val#'s. + // LiveRange now. Also remove dead val#'s. unsigned NumValNos = 0; for (unsigned i = 0; i < NumNewVals; ++i) { VNInfo *VNI = NewVNInfo[i]; @@ -487,31 +480,31 @@ void LiveInterval::join(LiveInterval &Other, if (NumNewVals < NumVals) valnos.resize(NumNewVals); // shrinkify - // Okay, now insert the RHS live ranges into the LHS. + // Okay, now insert the RHS live segments into the LHS. LiveRangeUpdater Updater(this); for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) Updater.add(*I); } -/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live -/// interval as the specified value number. The LiveRanges in RHS are -/// allowed to overlap with LiveRanges in the current interval, but only if -/// the overlapping LiveRanges have the specified value number. -void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, - VNInfo *LHSValNo) { +/// Merge all of the segments in RHS into this live range as the specified +/// value number. The segments in RHS are allowed to overlap with segments in +/// the current range, but only if the overlapping segments have the +/// specified value number. +void LiveRange::MergeSegmentsInAsValue(const LiveRange &RHS, + VNInfo *LHSValNo) { LiveRangeUpdater Updater(this); for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) Updater.add(I->start, I->end, LHSValNo); } -/// MergeValueInAsValue - Merge all of the live ranges of a specific val# -/// in RHS into this live interval as the specified value number. -/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the -/// current interval, it will replace the value numbers of the overlaped -/// live ranges with the specified value number. -void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, - const VNInfo *RHSValNo, - VNInfo *LHSValNo) { +/// MergeValueInAsValue - Merge all of the live segments of a specific val# +/// in RHS into this live range as the specified value number. +/// The segments in RHS are allowed to overlap with segments in the +/// current range, it will replace the value numbers of the overlaped +/// segments with the specified value number. +void LiveRange::MergeValueInAsValue(const LiveRange &RHS, + const VNInfo *RHSValNo, + VNInfo *LHSValNo) { LiveRangeUpdater Updater(this); for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) if (I->valno == RHSValNo) @@ -520,9 +513,9 @@ void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, /// MergeValueNumberInto - This method is called when two value nubmers /// are found to be equivalent. This eliminates V1, replacing all -/// LiveRanges with the V1 value number with the V2 value number. This can +/// segments with the V1 value number with the V2 value number. This can /// cause merging of V1/V2 values numbers and compaction of the value space. -VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { +VNInfo *LiveRange::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { assert(V1 != V2 && "Identical value#'s are always equivalent!"); // This code actually merges the (numerically) larger value number into the @@ -536,37 +529,37 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { std::swap(V1, V2); } - // Merge V1 live ranges into V2. + // Merge V1 segments into V2. for (iterator I = begin(); I != end(); ) { - iterator LR = I++; - if (LR->valno != V1) continue; // Not a V1 LiveRange. + iterator S = I++; + if (S->valno != V1) continue; // Not a V1 Segment. // Okay, we found a V1 live range. If it had a previous, touching, V2 live // range, extend it. - if (LR != begin()) { - iterator Prev = LR-1; - if (Prev->valno == V2 && Prev->end == LR->start) { - Prev->end = LR->end; + if (S != begin()) { + iterator Prev = S-1; + if (Prev->valno == V2 && Prev->end == S->start) { + Prev->end = S->end; // Erase this live-range. - ranges.erase(LR); + segments.erase(S); I = Prev+1; - LR = Prev; + S = Prev; } } // Okay, now we have a V1 or V2 live range that is maximally merged forward. // Ensure that it is a V2 live-range. - LR->valno = V2; + S->valno = V2; - // If we can merge it into later V2 live ranges, do so now. We ignore any - // following V1 live ranges, as they will be merged in subsequent iterations + // If we can merge it into later V2 segments, do so now. We ignore any + // following V1 segments, as they will be merged in subsequent iterations // of the loop. if (I != end()) { - if (I->start == LR->end && I->valno == V2) { - LR->end = I->end; - ranges.erase(I); - I = LR+1; + if (I->start == S->end && I->valno == V2) { + S->end = I->end; + segments.erase(I); + I = S+1; } } } @@ -584,22 +577,21 @@ unsigned LiveInterval::getSize() const { return Sum; } -raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) { - return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")"; +raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange::Segment &S) { + return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ")"; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void LiveRange::dump() const { +void LiveRange::Segment::dump() const { dbgs() << *this << "\n"; } #endif -void LiveInterval::print(raw_ostream &OS) const { +void LiveRange::print(raw_ostream &OS) const { if (empty()) OS << "EMPTY"; else { - for (LiveInterval::Ranges::const_iterator I = ranges.begin(), - E = ranges.end(); I != E; ++I) { + for (const_iterator I = begin(), E = end(); I != E; ++I) { OS << *I; assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo"); } @@ -625,19 +617,29 @@ void LiveInterval::print(raw_ostream &OS) const { } } +void LiveInterval::print(raw_ostream &OS) const { + OS << PrintReg(reg) << ' '; + super::print(OS); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void LiveRange::dump() const { + dbgs() << *this << "\n"; +} + void LiveInterval::dump() const { dbgs() << *this << "\n"; } #endif #ifndef NDEBUG -void LiveInterval::verify() const { +void LiveRange::verify() const { for (const_iterator I = begin(), E = end(); I != E; ++I) { assert(I->start.isValid()); assert(I->end.isValid()); assert(I->start < I->end); assert(I->valno != 0); + assert(I->valno->id < valnos.size()); assert(I->valno == valnos[I->valno->id]); if (llvm::next(I) != E) { assert(I->end <= llvm::next(I)->start); @@ -649,10 +651,6 @@ void LiveInterval::verify() const { #endif -void LiveRange::print(raw_ostream &os) const { - os << *this; -} - //===----------------------------------------------------------------------===// // LiveRangeUpdater class //===----------------------------------------------------------------------===// @@ -665,11 +663,11 @@ void LiveRange::print(raw_ostream &os) const { // // Otherwise, segments are kept in three separate areas: // -// 1. [begin; WriteI) at the front of LI. -// 2. [ReadI; end) at the back of LI. +// 1. [begin; WriteI) at the front of LR. +// 2. [ReadI; end) at the back of LR. // 3. Spills. // -// - LI.begin() <= WriteI <= ReadI <= LI.end(). +// - LR.begin() <= WriteI <= ReadI <= LR.end(). // - Segments in all three areas are fully ordered and coalesced. // - Segments in area 1 precede and can't coalesce with segments in area 2. // - Segments in Spills precede and can't coalesce with segments in area 2. @@ -684,23 +682,23 @@ void LiveRange::print(raw_ostream &os) const { void LiveRangeUpdater::print(raw_ostream &OS) const { if (!isDirty()) { - if (LI) - OS << "Clean " << PrintReg(LI->reg) << " updater: " << *LI << '\n'; + if (LR) + OS << "Clean updater: " << *LR << '\n'; else OS << "Null updater.\n"; return; } - assert(LI && "Can't have null LI in dirty updater."); - OS << PrintReg(LI->reg) << " updater with gap = " << (ReadI - WriteI) + assert(LR && "Can't have null LR in dirty updater."); + OS << " updater with gap = " << (ReadI - WriteI) << ", last start = " << LastStart << ":\n Area 1:"; - for (LiveInterval::const_iterator I = LI->begin(); I != WriteI; ++I) + for (LiveRange::const_iterator I = LR->begin(); I != WriteI; ++I) OS << ' ' << *I; OS << "\n Spills:"; for (unsigned I = 0, E = Spills.size(); I != E; ++I) OS << ' ' << Spills[I]; OS << "\n Area 2:"; - for (LiveInterval::const_iterator I = ReadI, E = LI->end(); I != E; ++I) + for (LiveRange::const_iterator I = ReadI, E = LR->end(); I != E; ++I) OS << ' ' << *I; OS << '\n'; } @@ -711,8 +709,9 @@ void LiveRangeUpdater::dump() const } // Determine if A and B should be coalesced. -static inline bool coalescable(const LiveRange &A, const LiveRange &B) { - assert(A.start <= B.start && "Unordered live ranges."); +static inline bool coalescable(const LiveRange::Segment &A, + const LiveRange::Segment &B) { + assert(A.start <= B.start && "Unordered live segments."); if (A.end == B.start) return A.valno == B.valno; if (A.end < B.start) @@ -721,8 +720,8 @@ static inline bool coalescable(const LiveRange &A, const LiveRange &B) { return true; } -void LiveRangeUpdater::add(LiveRange Seg) { - assert(LI && "Cannot add to a null destination"); +void LiveRangeUpdater::add(LiveRange::Segment Seg) { + assert(LR && "Cannot add to a null destination"); // Flush the state if Start moves backwards. if (!LastStart.isValid() || LastStart > Seg.start) { @@ -730,21 +729,21 @@ void LiveRangeUpdater::add(LiveRange Seg) { flush(); // This brings us to an uninitialized state. Reinitialize. assert(Spills.empty() && "Leftover spilled segments"); - WriteI = ReadI = LI->begin(); + WriteI = ReadI = LR->begin(); } // Remember start for next time. LastStart = Seg.start; // Advance ReadI until it ends after Seg.start. - LiveInterval::iterator E = LI->end(); + LiveRange::iterator E = LR->end(); if (ReadI != E && ReadI->end <= Seg.start) { // First try to close the gap between WriteI and ReadI with spills. if (ReadI != WriteI) mergeSpills(); // Then advance ReadI. if (ReadI == WriteI) - ReadI = WriteI = LI->find(Seg.start); + ReadI = WriteI = LR->find(Seg.start); else while (ReadI != E && ReadI->end <= Seg.start) *WriteI++ = *ReadI++; @@ -777,7 +776,7 @@ void LiveRangeUpdater::add(LiveRange Seg) { } // Try coalescing Seg into WriteI[-1]. - if (WriteI != LI->begin() && coalescable(WriteI[-1], Seg)) { + if (WriteI != LR->begin() && coalescable(WriteI[-1], Seg)) { WriteI[-1].end = std::max(WriteI[-1].end, Seg.end); return; } @@ -788,10 +787,10 @@ void LiveRangeUpdater::add(LiveRange Seg) { return; } - // Finally, append to LI or Spills. + // Finally, append to LR or Spills. if (WriteI == E) { - LI->ranges.push_back(Seg); - WriteI = ReadI = LI->ranges.end(); + LR->segments.push_back(Seg); + WriteI = ReadI = LR->end(); } else Spills.push_back(Seg); } @@ -802,10 +801,10 @@ void LiveRangeUpdater::mergeSpills() { // Perform a backwards merge of Spills and [SpillI;WriteI). size_t GapSize = ReadI - WriteI; size_t NumMoved = std::min(Spills.size(), GapSize); - LiveInterval::iterator Src = WriteI; - LiveInterval::iterator Dst = Src + NumMoved; - LiveInterval::iterator SpillSrc = Spills.end(); - LiveInterval::iterator B = LI->begin(); + LiveRange::iterator Src = WriteI; + LiveRange::iterator Dst = Src + NumMoved; + LiveRange::iterator SpillSrc = Spills.end(); + LiveRange::iterator B = LR->begin(); // This is the new WriteI position after merging spills. WriteI = Dst; @@ -827,12 +826,12 @@ void LiveRangeUpdater::flush() { // Clear the dirty state. LastStart = SlotIndex(); - assert(LI && "Cannot add to a null destination"); + assert(LR && "Cannot add to a null destination"); // Nothing to merge? if (Spills.empty()) { - LI->ranges.erase(WriteI, ReadI); - LI->verify(); + LR->segments.erase(WriteI, ReadI); + LR->verify(); return; } @@ -840,17 +839,17 @@ void LiveRangeUpdater::flush() { size_t GapSize = ReadI - WriteI; if (GapSize < Spills.size()) { // The gap is too small. Make some room. - size_t WritePos = WriteI - LI->begin(); - LI->ranges.insert(ReadI, Spills.size() - GapSize, LiveRange()); + size_t WritePos = WriteI - LR->begin(); + LR->segments.insert(ReadI, Spills.size() - GapSize, LiveRange::Segment()); // This also invalidated ReadI, but it is recomputed below. - WriteI = LI->ranges.begin() + WritePos; + WriteI = LR->begin() + WritePos; } else { // Shrink the gap if necessary. - LI->ranges.erase(WriteI + Spills.size(), ReadI); + LR->segments.erase(WriteI + Spills.size(), ReadI); } ReadI = WriteI + Spills.size(); mergeSpills(); - LI->verify(); + LR->verify(); } unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { @@ -909,8 +908,16 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], MachineOperand &MO = RI.getOperand(); MachineInstr *MI = MO.getParent(); ++RI; - // DBG_VALUE instructions should have been eliminated earlier. - LiveRangeQuery LRQ(LI, LIS.getInstructionIndex(MI)); + // DBG_VALUE instructions don't have slot indexes, so get the index of the + // instruction before them. + // Normally, DBG_VALUE instructions are removed before this function is + // called, but it is not a requirement. + SlotIndex Idx; + if (MI->isDebugValue()) + Idx = LIS.getSlotIndexes()->getIndexBefore(MI); + else + Idx = LIS.getInstructionIndex(MI); + LiveQueryResult LRQ = LI.Query(Idx); const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined(); // In the case of an <undef> use that isn't tied to any def, VNI will be // NULL. If the use is tied to a def, VNI will be the defined value. @@ -927,11 +934,11 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], if (unsigned eq = EqClass[I->valno->id]) { assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) && "New intervals should be empty"); - LIV[eq]->ranges.push_back(*I); + LIV[eq]->segments.push_back(*I); } else *J++ = *I; } - LI.ranges.erase(J, E); + LI.segments.erase(J, E); // Transfer VNInfos to their new owners and renumber them. unsigned j = 0, e = LI.getNumValNums(); diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp index f1b8394..e1c3217 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Value.h" +#include "llvm/Support/BlockFrequency.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -51,6 +52,14 @@ INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_END(LiveIntervals, "liveintervals", "Live Interval Analysis", false, false) +#ifndef NDEBUG +static cl::opt<bool> EnablePrecomputePhysRegs( + "precompute-phys-liveness", cl::Hidden, + cl::desc("Eagerly compute live intervals for all physreg units.")); +#else +static bool EnablePrecomputePhysRegs = false; +#endif // NDEBUG + void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); @@ -86,15 +95,15 @@ void LiveIntervals::releaseMemory() { RegMaskBits.clear(); RegMaskBlocks.clear(); - for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i) - delete RegUnitIntervals[i]; - RegUnitIntervals.clear(); + for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i) + delete RegUnitRanges[i]; + RegUnitRanges.clear(); // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. VNInfoAllocator.Reset(); } -/// runOnMachineFunction - Register allocate the whole function +/// runOnMachineFunction - calculates LiveIntervals /// bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { MF = &fn; @@ -115,6 +124,12 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { computeRegMasks(); computeLiveInRegUnits(); + if (EnablePrecomputePhysRegs) { + // For stress testing, precompute live ranges of all physical register + // units, including reserved registers. + for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) + getRegUnit(i); + } DEBUG(dump()); return true; } @@ -124,15 +139,15 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const { OS << "********** INTERVALS **********\n"; // Dump the regunits. - for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i) - if (LiveInterval *LI = RegUnitIntervals[i]) - OS << PrintRegUnit(i, TRI) << " = " << *LI << '\n'; + for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i) + if (LiveRange *LR = RegUnitRanges[i]) + OS << PrintRegUnit(i, TRI) << ' ' << *LR << '\n'; // Dump the virtregs. for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (hasInterval(Reg)) - OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n'; + OS << getInterval(Reg) << '\n'; } OS << "RegMasks:"; @@ -155,16 +170,17 @@ void LiveIntervals::dumpInstrs() const { #endif LiveInterval* LiveIntervals::createInterval(unsigned reg) { - float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F; + float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? + llvm::huge_valf : 0.0F; return new LiveInterval(reg, Weight); } /// computeVirtRegInterval - Compute the live interval of a virtual register, /// based on defs and uses. -void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) { +void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LRCalc && "LRCalc not initialized."); - assert(LI->empty() && "Should only compute empty intervals."); + assert(LI.empty() && "Should only compute empty intervals."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); LRCalc->createDeadDefs(LI); LRCalc->extendToUses(LI); @@ -175,9 +191,7 @@ void LiveIntervals::computeVirtRegs() { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; - LiveInterval *LI = createInterval(Reg); - VirtRegIntervals[Reg] = LI; - computeVirtRegInterval(LI); + createAndComputeVirtRegInterval(Reg); } } @@ -214,12 +228,10 @@ void LiveIntervals::computeRegMasks() { // interference. // -/// computeRegUnitInterval - Compute the live interval of a register unit, based -/// on the uses and defs of aliasing registers. The interval should be empty, +/// computeRegUnitInterval - Compute the live range of a register unit, based +/// on the uses and defs of aliasing registers. The range should be empty, /// or contain only dead phi-defs from ABI blocks. -void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { - unsigned Unit = LI->reg; - +void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) { assert(LRCalc && "LRCalc not initialized."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); @@ -229,25 +241,21 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { // idempotent. It is very rare for a register unit to have multiple roots, so // uniquing super-registers is probably not worthwhile. for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { - unsigned Root = *Roots; - if (!MRI->reg_empty(Root)) - LRCalc->createDeadDefs(LI, Root); - for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) { + for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true); + Supers.isValid(); ++Supers) { if (!MRI->reg_empty(*Supers)) - LRCalc->createDeadDefs(LI, *Supers); + LRCalc->createDeadDefs(LR, *Supers); } } - // Now extend LI to reach all uses. + // Now extend LR to reach all uses. // Ignore uses of reserved registers. We only track defs of those. for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { - unsigned Root = *Roots; - if (!MRI->isReserved(Root) && !MRI->reg_empty(Root)) - LRCalc->extendToUses(LI, Root); - for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) { + for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true); + Supers.isValid(); ++Supers) { unsigned Reg = *Supers; if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg)) - LRCalc->extendToUses(LI, Reg); + LRCalc->extendToUses(LR, Reg); } } } @@ -258,11 +266,11 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { /// without a corresponding def when entering the entry block or a landing pad. /// void LiveIntervals::computeLiveInRegUnits() { - RegUnitIntervals.resize(TRI->getNumRegUnits()); + RegUnitRanges.resize(TRI->getNumRegUnits()); DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n"); - // Keep track of the intervals allocated. - SmallVector<LiveInterval*, 8> NewIntvs; + // Keep track of the live range sets allocated. + SmallVector<unsigned, 8> NewRanges; // Check all basic blocks for live-ins. for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); @@ -280,23 +288,25 @@ void LiveIntervals::computeLiveInRegUnits() { LIE = MBB->livein_end(); LII != LIE; ++LII) { for (MCRegUnitIterator Units(*LII, TRI); Units.isValid(); ++Units) { unsigned Unit = *Units; - LiveInterval *Intv = RegUnitIntervals[Unit]; - if (!Intv) { - Intv = RegUnitIntervals[Unit] = new LiveInterval(Unit, HUGE_VALF); - NewIntvs.push_back(Intv); + LiveRange *LR = RegUnitRanges[Unit]; + if (!LR) { + LR = RegUnitRanges[Unit] = new LiveRange(); + NewRanges.push_back(Unit); } - VNInfo *VNI = Intv->createDeadDef(Begin, getVNInfoAllocator()); + VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator()); (void)VNI; DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << '#' << VNI->id); } } DEBUG(dbgs() << '\n'); } - DEBUG(dbgs() << "Created " << NewIntvs.size() << " new intervals.\n"); + DEBUG(dbgs() << "Created " << NewRanges.size() << " new intervals.\n"); - // Compute the 'normal' part of the intervals. - for (unsigned i = 0, e = NewIntvs.size(); i != e; ++i) - computeRegUnitInterval(NewIntvs[i]); + // Compute the 'normal' part of the ranges. + for (unsigned i = 0, e = NewRanges.size(); i != e; ++i) { + unsigned Unit = NewRanges[i]; + computeRegUnitRange(*RegUnitRanges[Unit], Unit); + } } @@ -320,7 +330,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) continue; SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); - LiveRangeQuery LRQ(*li, Idx); + LiveQueryResult LRQ = li->Query(Idx); VNInfo *VNI = LRQ.valueIn(); if (!VNI) { // This shouldn't happen: readsVirtualRegister returns true, but there is @@ -339,14 +349,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, WorkList.push_back(std::make_pair(Idx, VNI)); } - // Create a new live interval with only minimal live segments per def. - LiveInterval NewLI(li->reg, 0); + // Create new live ranges with only minimal live segments per def. + LiveRange NewLR; for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; - NewLI.addRange(LiveRange(VNI->def, VNI->def.getDeadSlot(), VNI)); + NewLR.addSegment(LiveRange::Segment(VNI->def, VNI->def.getDeadSlot(), VNI)); } // Keep track of the PHIs that are in use. @@ -361,7 +371,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, SlotIndex BlockStart = getMBBStartIdx(MBB); // Extend the live range for VNI to be live at Idx. - if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) { + if (VNInfo *ExtVNI = NewLR.extendInBlock(BlockStart, Idx)) { (void)ExtVNI; assert(ExtVNI == VNI && "Unexpected existing value number"); // Is this a PHIDef we haven't seen before? @@ -382,7 +392,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // VNI is live-in to MBB. DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); - NewLI.addRange(LiveRange(BlockStart, Idx, VNI)); + NewLR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI)); // Make sure VNI is live-out from the predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), @@ -403,14 +413,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, VNInfo *VNI = *I; if (VNI->isUnused()) continue; - LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def); - assert(LII != NewLI.end() && "Missing live range for PHI"); - if (LII->end != VNI->def.getDeadSlot()) + LiveRange::iterator LRI = NewLR.FindSegmentContaining(VNI->def); + assert(LRI != NewLR.end() && "Missing segment for PHI"); + if (LRI->end != VNI->def.getDeadSlot()) continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. VNI->markUnused(); - NewLI.removeRange(*LII); + NewLR.removeSegment(LRI->start, LRI->end); DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); CanSeparate = true; } else { @@ -425,23 +435,23 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, } } - // Move the trimmed ranges back. - li->ranges.swap(NewLI.ranges); + // Move the trimmed segments back. + li->segments.swap(NewLR.segments); DEBUG(dbgs() << "Shrunk: " << *li << '\n'); return CanSeparate; } -void LiveIntervals::extendToIndices(LiveInterval *LI, +void LiveIntervals::extendToIndices(LiveRange &LR, ArrayRef<SlotIndex> Indices) { assert(LRCalc && "LRCalc not initialized."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); for (unsigned i = 0, e = Indices.size(); i != e; ++i) - LRCalc->extend(LI, Indices[i]); + LRCalc->extend(LR, Indices[i]); } void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, SmallVectorImpl<SlotIndex> *EndPoints) { - LiveRangeQuery LRQ(*LI, Kill); + LiveQueryResult LRQ = LI->Query(Kill); VNInfo *VNI = LRQ.valueOut(); if (!VNI) return; @@ -452,13 +462,13 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, // If VNI isn't live out from KillMBB, the value is trivially pruned. if (LRQ.endPoint() < MBBEnd) { - LI->removeRange(Kill, LRQ.endPoint()); + LI->removeSegment(Kill, LRQ.endPoint()); if (EndPoints) EndPoints->push_back(LRQ.endPoint()); return; } // VNI is live out of KillMBB. - LI->removeRange(Kill, MBBEnd); + LI->removeSegment(Kill, MBBEnd); if (EndPoints) EndPoints->push_back(MBBEnd); // Find all blocks that are reachable from KillMBB without leaving VNI's live @@ -476,23 +486,23 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, // Check if VNI is live in to MBB. tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB); - LiveRangeQuery LRQ(*LI, MBBStart); + LiveQueryResult LRQ = LI->Query(MBBStart); if (LRQ.valueIn() != VNI) { - // This block isn't part of the VNI live range. Prune the search. + // This block isn't part of the VNI segment. Prune the search. I.skipChildren(); continue; } // Prune the search if VNI is killed in MBB. if (LRQ.endPoint() < MBBEnd) { - LI->removeRange(MBBStart, LRQ.endPoint()); + LI->removeSegment(MBBStart, LRQ.endPoint()); if (EndPoints) EndPoints->push_back(LRQ.endPoint()); I.skipChildren(); continue; } // VNI is live through MBB. - LI->removeRange(MBBStart, MBBEnd); + LI->removeSegment(MBBStart, MBBEnd); if (EndPoints) EndPoints->push_back(MBBEnd); ++I; } @@ -505,7 +515,7 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // Keep track of regunit ranges. - SmallVector<std::pair<LiveInterval*, LiveInterval::iterator>, 8> RU; + SmallVector<std::pair<LiveRange*, LiveRange::iterator>, 8> RU; for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); @@ -520,13 +530,14 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { RU.clear(); for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid(); ++Units) { - LiveInterval *RUInt = &getRegUnit(*Units); - if (RUInt->empty()) + LiveRange &RURanges = getRegUnit(*Units); + if (RURanges.empty()) continue; - RU.push_back(std::make_pair(RUInt, RUInt->find(LI->begin()->end))); + RU.push_back(std::make_pair(&RURanges, RURanges.find(LI->begin()->end))); } - // Every instruction that kills Reg corresponds to a live range end point. + // Every instruction that kills Reg corresponds to a segment range end + // point. for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; ++RI) { // A block index indicates an MBB edge. @@ -536,7 +547,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { if (!MI) continue; - // Check if any of the reguints are live beyond the end of RI. That could + // Check if any of the regunits are live beyond the end of RI. That could // happen when a physreg is defined as a copy of a virtreg: // // %EAX = COPY %vreg5 @@ -546,12 +557,12 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX. bool CancelKill = false; for (unsigned u = 0, e = RU.size(); u != e; ++u) { - LiveInterval *RInt = RU[u].first; - LiveInterval::iterator &I = RU[u].second; - if (I == RInt->end()) + LiveRange &RRanges = *RU[u].first; + LiveRange::iterator &I = RU[u].second; + if (I == RRanges.end()) continue; - I = RInt->advanceTo(I, RI->end); - if (I == RInt->end() || I->start >= RI->end) + I = RRanges.advanceTo(I, RI->end); + if (I == RRanges.end() || I->start >= RI->end) continue; // I is overlapping RI. CancelKill = true; @@ -609,35 +620,23 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { } float -LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { - // Limit the loop depth ridiculousness. - if (loopDepth > 200) - loopDepth = 200; - - // The loop depth is used to roughly estimate the number of times the - // instruction is executed. Something like 10^d is simple, but will quickly - // overflow a float. This expression behaves like 10^d for small d, but is - // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of - // headroom before overflow. - // By the way, powf() might be unavailable here. For consistency, - // We may take pow(double,double). - float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth); - - return (isDef + isUse) * lc; +LiveIntervals::getSpillWeight(bool isDef, bool isUse, BlockFrequency freq) { + const float Scale = 1.0f / BlockFrequency::getEntryFrequency(); + return (isDef + isUse) * (freq.getFrequency() * Scale); } -LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, - MachineInstr* startInst) { - LiveInterval& Interval = getOrCreateInterval(reg); +LiveRange::Segment +LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr* startInst) { + LiveInterval& Interval = createEmptyInterval(reg); VNInfo* VN = Interval.getNextValue( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getVNInfoAllocator()); - LiveRange LR( + LiveRange::Segment S( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getMBBEndIdx(startInst->getParent()), VN); - Interval.addRange(LR); + Interval.addSegment(S); - return LR; + return S; } @@ -712,7 +711,7 @@ private: const TargetRegisterInfo& TRI; SlotIndex OldIdx; SlotIndex NewIdx; - SmallPtrSet<LiveInterval*, 8> Updated; + SmallPtrSet<LiveRange*, 8> Updated; bool UpdateFlags; public: @@ -726,7 +725,7 @@ public: // physregs, even those that aren't needed for regalloc, in order to update // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill // flags, and postRA passes will use a live register utility instead. - LiveInterval *getRegUnitLI(unsigned Unit) { + LiveRange *getRegUnitLI(unsigned Unit) { if (UpdateFlags) return &LIS.getRegUnit(Unit); return LIS.getCachedRegUnit(Unit); @@ -751,15 +750,16 @@ public: if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) { - updateRange(LIS.getInterval(Reg)); + LiveInterval &LI = LIS.getInterval(Reg); + updateRange(LI, Reg); continue; } // For physregs, only update the regunits that actually have a // precomputed live range. for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) - if (LiveInterval *LI = getRegUnitLI(*Units)) - updateRange(*LI); + if (LiveRange *LR = getRegUnitLI(*Units)) + updateRange(*LR, *Units); } if (hasRegMask) updateRegMaskSlots(); @@ -768,26 +768,26 @@ public: private: /// Update a single live range, assuming an instruction has been moved from /// OldIdx to NewIdx. - void updateRange(LiveInterval &LI) { - if (!Updated.insert(&LI)) + void updateRange(LiveRange &LR, unsigned Reg) { + if (!Updated.insert(&LR)) return; DEBUG({ dbgs() << " "; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) - dbgs() << PrintReg(LI.reg); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + dbgs() << PrintReg(Reg); else - dbgs() << PrintRegUnit(LI.reg, &TRI); - dbgs() << ":\t" << LI << '\n'; + dbgs() << PrintRegUnit(Reg, &TRI); + dbgs() << ":\t" << LR << '\n'; }); if (SlotIndex::isEarlierInstr(OldIdx, NewIdx)) - handleMoveDown(LI); + handleMoveDown(LR); else - handleMoveUp(LI); - DEBUG(dbgs() << " -->\t" << LI << '\n'); - LI.verify(); + handleMoveUp(LR, Reg); + DEBUG(dbgs() << " -->\t" << LR << '\n'); + LR.verify(); } - /// Update LI to reflect an instruction has been moved downwards from OldIdx + /// Update LR to reflect an instruction has been moved downwards from OldIdx /// to NewIdx. /// /// 1. Live def at OldIdx: @@ -801,17 +801,17 @@ private: /// Move def to NewIdx, possibly across another live value. /// /// 4. Def at OldIdx AND at NewIdx: - /// Remove live range [OldIdx;NewIdx) and value defined at OldIdx. + /// Remove segment [OldIdx;NewIdx) and value defined at OldIdx. /// (Happens when bundling multiple defs together). /// /// 5. Value read at OldIdx, killed before NewIdx: /// Extend kill to NewIdx. /// - void handleMoveDown(LiveInterval &LI) { + void handleMoveDown(LiveRange &LR) { // First look for a kill at OldIdx. - LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex()); - LiveInterval::iterator E = LI.end(); - // Is LI even live at OldIdx? + LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); + LiveRange::iterator E = LR.end(); + // Is LR even live at OldIdx? if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) return; @@ -828,7 +828,7 @@ private: for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO) if (MO->isReg() && MO->isUse()) MO->setIsKill(false); - // Adjust I->end to reach NewIdx. This may temporarily make LI invalid by + // Adjust I->end to reach NewIdx. This may temporarily make LR invalid by // overlapping ranges. Case 5 above. I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); // If this was a kill, there may also be a def. Otherwise we're done. @@ -857,24 +857,25 @@ private: assert((I->end == OldIdx.getDeadSlot() || SlotIndex::isSameInstr(I->end, NewIdx)) && "Cannot move def below kill"); - LiveInterval::iterator NewI = LI.advanceTo(I, NewIdx.getRegSlot()); + LiveRange::iterator NewI = LR.advanceTo(I, NewIdx.getRegSlot()); if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) { // There is an existing def at NewIdx, case 4 above. The def at OldIdx is // coalesced into that value. assert(NewI->valno != DefVNI && "Multiple defs of value?"); - LI.removeValNo(DefVNI); + LR.removeValNo(DefVNI); return; } // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx. - // If the def at OldIdx was dead, we allow it to be moved across other LI + // If the def at OldIdx was dead, we allow it to be moved across other LR // values. The new range should be placed immediately before NewI, move any // intermediate ranges up. assert(NewI != I && "Inconsistent iterators"); std::copy(llvm::next(I), NewI, I); - *llvm::prior(NewI) = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); + *llvm::prior(NewI) + = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } - /// Update LI to reflect an instruction has been moved upwards from OldIdx + /// Update LR to reflect an instruction has been moved upwards from OldIdx /// to NewIdx. /// /// 1. Live def at OldIdx: @@ -894,11 +895,11 @@ private: /// Hoist kill to NewIdx, then scan for last kill between NewIdx and /// OldIdx. /// - void handleMoveUp(LiveInterval &LI) { + void handleMoveUp(LiveRange &LR, unsigned Reg) { // First look for a kill at OldIdx. - LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex()); - LiveInterval::iterator E = LI.end(); - // Is LI even live at OldIdx? + LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); + LiveRange::iterator E = LR.end(); + // Is LR even live at OldIdx? if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) return; @@ -915,7 +916,7 @@ private: if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) { // No def, search for the new kill. // This can never be an early clobber kill since there is no def. - llvm::prior(I)->end = findLastUseBefore(LI.reg).getRegSlot(); + llvm::prior(I)->end = findLastUseBefore(Reg).getRegSlot(); return; } } @@ -927,18 +928,18 @@ private: DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); // Check for an existing def at NewIdx. - LiveInterval::iterator NewI = LI.find(NewIdx.getRegSlot()); + LiveRange::iterator NewI = LR.find(NewIdx.getRegSlot()); if (SlotIndex::isSameInstr(NewI->start, NewIdx)) { assert(NewI->valno != DefVNI && "Same value defined more than once?"); // There is an existing def at NewIdx. if (I->end.isDead()) { // Case 3: Remove the dead def at OldIdx. - LI.removeValNo(DefVNI); + LR.removeValNo(DefVNI); return; } // Case 4: Replace def at NewIdx with live def at OldIdx. I->start = DefVNI->def; - LI.removeValNo(NewI->valno); + LR.removeValNo(NewI->valno); return; } @@ -949,10 +950,10 @@ private: return; } - // DefVNI is a dead def. It may have been moved across other values in LI, + // DefVNI is a dead def. It may have been moved across other values in LR, // so move I up to NewI. Slide [NewI;I) down one position. std::copy_backward(NewI, I, llvm::next(I)); - *NewI = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); + *NewI = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } void updateRegMaskSlots() { @@ -1075,8 +1076,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, if (MOI->isReg() && TargetRegisterInfo::isVirtualRegister(MOI->getReg()) && !hasInterval(MOI->getReg())) { - LiveInterval &LI = getOrCreateInterval(MOI->getReg()); - computeVirtRegInterval(&LI); + createAndComputeVirtRegInterval(MOI->getReg()); } } } @@ -1123,9 +1123,9 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, if (LII != LI.begin()) prevStart = llvm::prior(LII)->start; - // FIXME: This could be more efficient if there was a removeRange - // method that returned an iterator. - LI.removeRange(*LII, true); + // FIXME: This could be more efficient if there was a + // removeSegment method that returned an iterator. + LI.removeSegment(*LII, true); if (prevStart.isValid()) LII = LI.find(prevStart); else @@ -1144,13 +1144,14 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, if (!lastUseIdx.isValid()) { VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator); - LiveRange LR(instrIdx.getRegSlot(), instrIdx.getDeadSlot(), VNI); - LII = LI.addRange(LR); + LiveRange::Segment S(instrIdx.getRegSlot(), + instrIdx.getDeadSlot(), VNI); + LII = LI.addSegment(S); } else if (LII->start != instrIdx.getRegSlot()) { VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator); - LiveRange LR(instrIdx.getRegSlot(), lastUseIdx, VNI); - LII = LI.addRange(LR); + LiveRange::Segment S(instrIdx.getRegSlot(), lastUseIdx, VNI); + LII = LI.addSegment(S); } if (MO.getSubReg() && !MO.isUndef()) diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp index dede490..ae086bc 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -36,11 +36,11 @@ void LiveRangeCalc::reset(const MachineFunction *mf, } -void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) { +void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { assert(MRI && Indexes && "call reset() first"); // Visit all def operands. If the same instruction has multiple defs of Reg, - // LI->createDeadDef() will deduplicate. + // LR.createDeadDef() will deduplicate. for (MachineRegisterInfo::def_iterator I = MRI->def_begin(Reg), E = MRI->def_end(); I != E; ++I) { const MachineInstr *MI = &*I; @@ -54,13 +54,13 @@ void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) { Idx = Indexes->getInstructionIndex(MI) .getRegSlot(I.getOperand().isEarlyClobber()); - // Create the def in LI. This may find an existing def. - LI->createDeadDef(Idx, *Alloc); + // Create the def in LR. This may find an existing def. + LR.createDeadDef(Idx, *Alloc); } } -void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { +void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg) { assert(MRI && Indexes && "call reset() first"); // Visit all operands that read Reg. This may include partial defs. @@ -99,7 +99,7 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { Idx = Idx.getRegSlot(true); } } - extend(LI, Idx, Reg); + extend(LR, Idx, Reg); } } @@ -125,17 +125,14 @@ void LiveRangeCalc::updateLiveIns() { assert(Seen.test(MBB->getNumber())); LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)0); } - Updater.setDest(I->LI); + Updater.setDest(&I->LR); Updater.add(Start, End, I->Value); } LiveIn.clear(); } -void LiveRangeCalc::extend(LiveInterval *LI, - SlotIndex Kill, - unsigned PhysReg) { - assert(LI && "Missing live range"); +void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg) { assert(Kill.isValid() && "Invalid SlotIndex"); assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); @@ -144,14 +141,14 @@ void LiveRangeCalc::extend(LiveInterval *LI, assert(KillMBB && "No MBB at Kill"); // Is there a def in the same MBB we can extend? - if (LI->extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill)) + if (LR.extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill)) return; // Find the single reaching def, or determine if Kill is jointly dominated by // multiple values, and we may need to create even more phi-defs to preserve // VNInfo SSA form. Perform a search for all predecessor blocks where we // know the dominating VNInfo. - if (findReachingDefs(LI, KillMBB, Kill, PhysReg)) + if (findReachingDefs(LR, *KillMBB, Kill, PhysReg)) return; // When there were multiple different values, we may need new PHIs. @@ -170,13 +167,11 @@ void LiveRangeCalc::calculateValues() { } -bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, - MachineBasicBlock *KillMBB, - SlotIndex Kill, - unsigned PhysReg) { - unsigned KillMBBNum = KillMBB->getNumber(); +bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, + SlotIndex Kill, unsigned PhysReg) { + unsigned KillMBBNum = KillMBB.getNumber(); - // Block numbers where LI should be live-in. + // Block numbers where LR should be live-in. SmallVector<unsigned, 16> WorkList(1, KillMBBNum); // Remember if we have seen more than one value. @@ -203,7 +198,7 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, #endif for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { + PE = MBB->pred_end(); PI != PE; ++PI) { MachineBasicBlock *Pred = *PI; // Is this a known live-out block? @@ -221,7 +216,7 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, // First time we see Pred. Try to determine the live-out value, but set // it as null if Pred is live-through with an unknown value. - VNInfo *VNI = LI->extendInBlock(Start, End); + VNInfo *VNI = LR.extendInBlock(Start, End); setLiveOutValue(Pred, VNI); if (VNI) { if (TheVNI && TheVNI != VNI) @@ -231,7 +226,7 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, } // No, we need a live-in value for Pred as well - if (Pred != KillMBB) + if (Pred != &KillMBB) WorkList.push_back(Pred->getNumber()); else // Loopback to KillMBB, so value is really live through. @@ -248,9 +243,9 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, // If a unique reaching def was found, blit in the live ranges immediately. if (UniqueVNI) { - LiveRangeUpdater Updater(LI); - for (SmallVectorImpl<unsigned>::const_iterator - I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { + LiveRangeUpdater Updater(&LR); + for (SmallVectorImpl<unsigned>::const_iterator I = WorkList.begin(), + E = WorkList.end(); I != E; ++I) { SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(*I); // Trim the live range in KillMBB. @@ -270,8 +265,8 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, for (SmallVectorImpl<unsigned>::const_iterator I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { MachineBasicBlock *MBB = MF->getBlockNumbered(*I); - addLiveInBlock(LI, DomTree->getNode(MBB)); - if (MBB == KillMBB) + addLiveInBlock(LR, DomTree->getNode(MBB)); + if (MBB == &KillMBB) LiveIn.back().Kill = Kill; } @@ -348,16 +343,17 @@ void LiveRangeCalc::updateSSA() { assert(Alloc && "Need VNInfo allocator to create PHI-defs"); SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(MBB); - VNInfo *VNI = I->LI->getNextValue(Start, *Alloc); + LiveRange &LR = I->LR; + VNInfo *VNI = LR.getNextValue(Start, *Alloc); I->Value = VNI; // This block is done, we know the final value. I->DomNode = 0; // Add liveness since updateLiveIns now skips this node. if (I->Kill.isValid()) - I->LI->addRange(LiveRange(Start, I->Kill, VNI)); + LR.addSegment(LiveInterval::Segment(Start, I->Kill, VNI)); else { - I->LI->addRange(LiveRange(Start, End, VNI)); + LR.addSegment(LiveInterval::Segment(Start, End, VNI)); LOP = LiveOutPair(VNI, Node); } } else if (IDomValue.first) { diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h index 57cab7b..a3a3fbb 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h @@ -75,9 +75,9 @@ class LiveRangeCalc { /// LiveInBlock - Information about a basic block where a live range is known /// to be live-in, but the value has not yet been determined. struct LiveInBlock { - // LI - The live range that is live-in to this block. The algorithms can + // The live range set that is live-in to this block. The algorithms can // handle multiple non-overlapping live ranges simultaneously. - LiveInterval *LI; + LiveRange &LR; // DomNode - Dominator tree node for the block. // Cleared when the final value has been determined and LI has been updated. @@ -91,8 +91,8 @@ class LiveRangeCalc { // Live-in value filled in by updateSSA once it is known. VNInfo *Value; - LiveInBlock(LiveInterval *li, MachineDomTreeNode *node, SlotIndex kill) - : LI(li), DomNode(node), Kill(kill), Value(0) {} + LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill) + : LR(LR), DomNode(node), Kill(kill), Value(0) {} }; /// LiveIn - Work list of blocks where the live-in value has yet to be @@ -111,10 +111,8 @@ class LiveRangeCalc { /// are added to the LiveIn array, and the function returns false. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - bool findReachingDefs(LiveInterval *LI, - MachineBasicBlock *KillMBB, - SlotIndex Kill, - unsigned PhysReg); + bool findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, + SlotIndex Kill, unsigned PhysReg); /// updateSSA - Compute the values that will be live in to all requested /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. @@ -146,10 +144,6 @@ public: MachineDominatorTree*, VNInfo::Allocator*); - /// calculate - Calculate the live range of a virtual register from its defs - /// and uses. LI must be empty with no values. - void calculate(LiveInterval *LI); - //===--------------------------------------------------------------------===// // Mid-level interface. //===--------------------------------------------------------------------===// @@ -165,27 +159,27 @@ public: /// single existing value, Alloc may be null. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - void extend(LiveInterval *LI, SlotIndex Kill, unsigned PhysReg = 0); + void extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg = 0); /// createDeadDefs - Create a dead def in LI for every def operand of Reg. /// Each instruction defining Reg gets a new VNInfo with a corresponding /// minimal live range. - void createDeadDefs(LiveInterval *LI, unsigned Reg); + void createDeadDefs(LiveRange &LR, unsigned Reg); /// createDeadDefs - Create a dead def in LI for every def of LI->reg. - void createDeadDefs(LiveInterval *LI) { - createDeadDefs(LI, LI->reg); + void createDeadDefs(LiveInterval &LI) { + createDeadDefs(LI, LI.reg); } /// extendToUses - Extend the live range of LI to reach all uses of Reg. /// /// All uses must be jointly dominated by existing liveness. PHI-defs are /// inserted as needed to preserve SSA form. - void extendToUses(LiveInterval *LI, unsigned Reg); + void extendToUses(LiveRange &LR, unsigned Reg); /// extendToUses - Extend the live range of LI to reach all uses of LI->reg. - void extendToUses(LiveInterval *LI) { - extendToUses(LI, LI->reg); + void extendToUses(LiveInterval &LI) { + extendToUses(LI, LI.reg); } //===--------------------------------------------------------------------===// @@ -216,15 +210,15 @@ public: /// function can only be called once per basic block. Once the live-in value /// has been determined, calculateValues() will add liveness to LI. /// - /// @param LI The live range that is live-in to the block. + /// @param LR The live range that is live-in to the block. /// @param DomNode The domtree node for the block. /// @param Kill Index in block where LI is killed. If the value is /// live-through, set Kill = SLotIndex() and also call /// setLiveOutValue(MBB, 0). - void addLiveInBlock(LiveInterval *LI, + void addLiveInBlock(LiveRange &LR, MachineDomTreeNode *DomNode, SlotIndex Kill = SlotIndex()) { - LiveIn.push_back(LiveInBlock(LI, DomNode, Kill)); + LiveIn.push_back(LiveInBlock(LR, DomNode, Kill)); } /// calculateValues - Calculate the value that will be live-in to each block diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp index 7793e96..cb70c43 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -13,7 +13,6 @@ #define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/LiveRangeEdit.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -31,17 +30,23 @@ STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); void LiveRangeEdit::Delegate::anchor() { } -LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) { +LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) { unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); if (VRM) { - VRM->grow(); VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); } - LiveInterval &LI = LIS.getOrCreateInterval(VReg); - NewRegs.push_back(&LI); + LiveInterval &LI = LIS.createEmptyInterval(VReg); return LI; } +unsigned LiveRangeEdit::createFrom(unsigned OldReg) { + unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + if (VRM) { + VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); + } + return VReg; +} + bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, AliasAnalysis *aa) { @@ -216,108 +221,122 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, return true; } -void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, - ArrayRef<unsigned> RegsBeingSpilled) { - SetVector<LiveInterval*, - SmallVector<LiveInterval*, 8>, - SmallPtrSet<LiveInterval*, 8> > ToShrink; +/// Find all live intervals that need to shrink, then remove the instruction. +void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { + assert(MI->allDefsAreDead() && "Def isn't really dead"); + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); - for (;;) { - // Erase all dead defs. - while (!Dead.empty()) { - MachineInstr *MI = Dead.pop_back_val(); - assert(MI->allDefsAreDead() && "Def isn't really dead"); - SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); - - // Never delete inline asm. - if (MI->isInlineAsm()) { - DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI); - continue; - } + // Never delete a bundled instruction. + if (MI->isBundled()) { + return; + } + // Never delete inline asm. + if (MI->isInlineAsm()) { + DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI); + return; + } - // Use the same criteria as DeadMachineInstructionElim. - bool SawStore = false; - if (!MI->isSafeToMove(&TII, 0, SawStore)) { - DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); - continue; - } + // Use the same criteria as DeadMachineInstructionElim. + bool SawStore = false; + if (!MI->isSafeToMove(&TII, 0, SawStore)) { + DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); + return; + } - DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); - - // Collect virtual registers to be erased after MI is gone. - SmallVector<unsigned, 8> RegsToErase; - bool ReadsPhysRegs = false; - - // Check for live intervals that may shrink - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { - if (!MOI->isReg()) - continue; - unsigned Reg = MOI->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { - // Check if MI reads any unreserved physregs. - if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) - ReadsPhysRegs = true; - continue; - } - LiveInterval &LI = LIS.getInterval(Reg); - - // Shrink read registers, unless it is likely to be expensive and - // unlikely to change anything. We typically don't want to shrink the - // PIC base register that has lots of uses everywhere. - // Always shrink COPY uses that probably come from live range splitting. - if (MI->readsVirtualRegister(Reg) && - (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) || - LI.killedAt(Idx))) - ToShrink.insert(&LI); - - // Remove defined value. - if (MOI->isDef()) { - if (VNInfo *VNI = LI.getVNInfoAt(Idx)) { - if (TheDelegate) - TheDelegate->LRE_WillShrinkVirtReg(LI.reg); - LI.removeValNo(VNI); - if (LI.empty()) - RegsToErase.push_back(Reg); + DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); + + // Collect virtual registers to be erased after MI is gone. + SmallVector<unsigned, 8> RegsToErase; + bool ReadsPhysRegs = false; + + // Check for live intervals that may shrink + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (!MOI->isReg()) + continue; + unsigned Reg = MOI->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + // Check if MI reads any unreserved physregs. + if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) + ReadsPhysRegs = true; + else if (MOI->isDef()) { + for (MCRegUnitIterator Units(Reg, MRI.getTargetRegisterInfo()); + Units.isValid(); ++Units) { + if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) { + if (VNInfo *VNI = LR->getVNInfoAt(Idx)) + LR->removeValNo(VNI); } } } - - // Currently, we don't support DCE of physreg live ranges. If MI reads - // any unreserved physregs, don't erase the instruction, but turn it into - // a KILL instead. This way, the physreg live ranges don't end up - // dangling. - // FIXME: It would be better to have something like shrinkToUses() for - // physregs. That could potentially enable more DCE and it would free up - // the physreg. It would not happen often, though. - if (ReadsPhysRegs) { - MI->setDesc(TII.get(TargetOpcode::KILL)); - // Remove all operands that aren't physregs. - for (unsigned i = MI->getNumOperands(); i; --i) { - const MachineOperand &MO = MI->getOperand(i-1); - if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - continue; - MI->RemoveOperand(i-1); - } - DEBUG(dbgs() << "Converted physregs to:\t" << *MI); - } else { + continue; + } + LiveInterval &LI = LIS.getInterval(Reg); + + // Shrink read registers, unless it is likely to be expensive and + // unlikely to change anything. We typically don't want to shrink the + // PIC base register that has lots of uses everywhere. + // Always shrink COPY uses that probably come from live range splitting. + if (MI->readsVirtualRegister(Reg) && + (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) || + LI.Query(Idx).isKill())) + ToShrink.insert(&LI); + + // Remove defined value. + if (MOI->isDef()) { + if (VNInfo *VNI = LI.getVNInfoAt(Idx)) { if (TheDelegate) - TheDelegate->LRE_WillEraseInstruction(MI); - LIS.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - ++NumDCEDeleted; + TheDelegate->LRE_WillShrinkVirtReg(LI.reg); + LI.removeValNo(VNI); + if (LI.empty()) + RegsToErase.push_back(Reg); } + } + } - // Erase any virtregs that are now empty and unused. There may be <undef> - // uses around. Keep the empty live range in that case. - for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) { - unsigned Reg = RegsToErase[i]; - if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) { - ToShrink.remove(&LIS.getInterval(Reg)); - eraseVirtReg(Reg); - } - } + // Currently, we don't support DCE of physreg live ranges. If MI reads + // any unreserved physregs, don't erase the instruction, but turn it into + // a KILL instead. This way, the physreg live ranges don't end up + // dangling. + // FIXME: It would be better to have something like shrinkToUses() for + // physregs. That could potentially enable more DCE and it would free up + // the physreg. It would not happen often, though. + if (ReadsPhysRegs) { + MI->setDesc(TII.get(TargetOpcode::KILL)); + // Remove all operands that aren't physregs. + for (unsigned i = MI->getNumOperands(); i; --i) { + const MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + continue; + MI->RemoveOperand(i-1); } + DEBUG(dbgs() << "Converted physregs to:\t" << *MI); + } else { + if (TheDelegate) + TheDelegate->LRE_WillEraseInstruction(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + ++NumDCEDeleted; + } + + // Erase any virtregs that are now empty and unused. There may be <undef> + // uses around. Keep the empty live range in that case. + for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) { + unsigned Reg = RegsToErase[i]; + if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) { + ToShrink.remove(&LIS.getInterval(Reg)); + eraseVirtReg(Reg); + } + } +} + +void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, + ArrayRef<unsigned> RegsBeingSpilled) { + ToShrinkSet ToShrink; + + for (;;) { + // Erase all dead defs. + while (!Dead.empty()) + eliminateDeadDef(Dead.pop_back_val(), ToShrink); if (ToShrink.empty()) break; @@ -331,7 +350,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, TheDelegate->LRE_WillShrinkVirtReg(LI->reg); if (!LIS.shrinkToUses(LI, &Dead)) continue; - + // Don't create new intervals for a register being spilled. // The new intervals would have to be spilled anyway so its not worth it. // Also they currently aren't spilled so creating them and not spilling @@ -343,11 +362,11 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, break; } } - + if (BeingSpilled) continue; // LI may have been separated, create new intervals. - LI->RenumberValues(LIS); + LI->RenumberValues(); ConnectedVNInfoEqClasses ConEQ(LIS); unsigned NumComp = ConEQ.Classify(LI); if (NumComp <= 1) @@ -357,7 +376,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); SmallVector<LiveInterval*, 8> Dups(1, LI); for (unsigned i = 1; i != NumComp; ++i) { - Dups.push_back(&createFrom(LI->reg)); + Dups.push_back(&createEmptyIntervalFrom(LI->reg)); // If LI is an original interval that hasn't been split yet, make the new // intervals their own originals instead of referring to LI. The original // interval must contain all the split products, and LI doesn't. @@ -374,14 +393,27 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, } } -void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, - const MachineLoopInfo &Loops) { - VirtRegAuxInfo VRAI(MF, LIS, Loops); - for (iterator I = begin(), E = end(); I != E; ++I) { - LiveInterval &LI = **I; +// Keep track of new virtual registers created via +// MachineRegisterInfo::createVirtualRegister. +void +LiveRangeEdit::MRI_NoteNewVirtualRegister(unsigned VReg) +{ + if (VRM) + VRM->grow(); + + NewRegs.push_back(VReg); +} + +void +LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, + const MachineLoopInfo &Loops, + const MachineBlockFrequencyInfo &MBFI) { + VirtRegAuxInfo VRAI(MF, LIS, Loops, MBFI); + for (unsigned I = 0, Size = size(); I < Size; ++I) { + LiveInterval &LI = LIS.getInterval(get(I)); if (MRI.recomputeRegClass(LI.reg, MF.getTarget())) DEBUG(dbgs() << "Inflated " << PrintReg(LI.reg) << " to " << MRI.getRegClass(LI.reg)->getName() << '\n'); - VRAI.CalculateWeightAndHint(LI); + VRAI.calculateSpillWeightAndHint(LI); } } diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp index 0ef069f..1d801ac 100644 --- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -119,9 +119,11 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, if (VirtReg.empty()) return false; CoalescerPair CP(VirtReg.reg, PhysReg, *TRI); - for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) - if (VirtReg.overlaps(LIS->getRegUnit(*Units), CP, *LIS->getSlotIndexes())) + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + const LiveRange &UnitRange = LIS->getRegUnit(*Units); + if (VirtReg.overlaps(UnitRange, CP, *LIS->getSlotIndexes())) return true; + } return false; } diff --git a/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp b/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp new file mode 100644 index 0000000..6221ca2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp @@ -0,0 +1,111 @@ +//===-- LiveInterval.cpp - Live Interval Representation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveRegUnits utility for tracking liveness of +// physical register units across machine instructions in forward or backward +// order. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +using namespace llvm; + +/// Return true if the given MachineOperand clobbers the given register unit. +/// A register unit is only clobbered if all its super-registers are clobbered. +static bool operClobbersUnit(const MachineOperand *MO, unsigned Unit, + const MCRegisterInfo *MCRI) { + for (MCRegUnitRootIterator RI(Unit, MCRI); RI.isValid(); ++RI) { + for (MCSuperRegIterator SI(*RI, MCRI, true); SI.isValid(); ++SI) { + if (!MO->clobbersPhysReg(*SI)) + return false; + } + } + return true; +} + +/// We assume the high bits of a physical super register are not preserved +/// unless the instruction has an implicit-use operand reading the +/// super-register or a register unit for the upper bits is available. +void LiveRegUnits::removeRegsInMask(const MachineOperand &Op, + const MCRegisterInfo &MCRI) { + SparseSet<unsigned>::iterator LUI = LiveUnits.begin(); + while (LUI != LiveUnits.end()) { + if (operClobbersUnit(&Op, *LUI, &MCRI)) + LUI = LiveUnits.erase(LUI); + else + ++LUI; + } +} + +void LiveRegUnits::stepBackward(const MachineInstr &MI, + const MCRegisterInfo &MCRI) { + // Remove defined registers and regmask kills from the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (O->isReg()) { + if (!O->isDef()) + continue; + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + removeReg(Reg, MCRI); + } else if (O->isRegMask()) { + removeRegsInMask(*O, MCRI); + } + } + // Add uses to the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (!O->isReg() || !O->readsReg() || O->isUndef()) + continue; + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + addReg(Reg, MCRI); + } +} + +/// Uses with kill flag get removed from the set, defs added. If possible +/// use StepBackward() instead of this function because some kill flags may +/// be missing. +void LiveRegUnits::stepForward(const MachineInstr &MI, + const MCRegisterInfo &MCRI) { + SmallVector<unsigned, 4> Defs; + // Remove killed registers from the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (O->isReg()) { + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + if (O->isDef()) { + if (!O->isDead()) + Defs.push_back(Reg); + } else { + if (!O->isKill()) + continue; + assert(O->isUse()); + removeReg(Reg, MCRI); + } + } else if (O->isRegMask()) { + removeRegsInMask(*O, MCRI); + } + } + // Add defs to the set. + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + addReg(Defs[i], MCRI); + } +} + +/// Adds all registers in the live-in list of block @p BB. +void LiveRegUnits::addLiveIns(const MachineBasicBlock *MBB, + const MCRegisterInfo &MCRI) { + for (MachineBasicBlock::livein_iterator L = MBB->livein_begin(), + LE = MBB->livein_end(); L != LE; ++L) { + addReg(*L, MCRI); + } +} diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp index 789eddc..ed55d7a 100644 --- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp @@ -217,8 +217,8 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, continue; unsigned DefReg = MO.getReg(); if (TRI->isSubRegister(Reg, DefReg)) { - PartDefRegs.insert(DefReg); - for (MCSubRegIterator SubRegs(DefReg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(DefReg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) PartDefRegs.insert(*SubRegs); } } @@ -271,8 +271,8 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { true/*IsImp*/)); // Remember this use. - PhysRegUse[Reg] = MI; - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) PhysRegUse[*SubRegs] = MI; } @@ -350,8 +350,8 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { continue; } if (MachineInstr *Use = PhysRegUse[SubReg]) { - PartUses.insert(SubReg); - for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); SS.isValid(); + ++SS) PartUses.insert(*SS); unsigned Dist = DistanceMap[Use]; if (Dist > LastRefOrPartRefDist) { @@ -387,8 +387,8 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { LastSubRef->addRegisterKilled(SubReg, TRI, true); else { LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); - PhysRegUse[SubReg] = LastRefOrPartRef; - for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); + SS.isValid(); ++SS) PhysRegUse[*SS] = LastRefOrPartRef; } for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) @@ -441,12 +441,12 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) { } void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, - SmallVector<unsigned, 4> &Defs) { + SmallVectorImpl<unsigned> &Defs) { // What parts of the register are previously defined? SmallSet<unsigned, 32> Live; if (PhysRegDef[Reg] || PhysRegUse[Reg]) { - Live.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) Live.insert(*SubRegs); } else { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { @@ -460,8 +460,8 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, if (Live.count(SubReg)) continue; if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) { - Live.insert(SubReg); - for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); + SS.isValid(); ++SS) Live.insert(*SS); } } @@ -484,13 +484,12 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, } void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, - SmallVector<unsigned, 4> &Defs) { + SmallVectorImpl<unsigned> &Defs) { while (!Defs.empty()) { unsigned Reg = Defs.back(); Defs.pop_back(); - PhysRegDef[Reg] = MI; - PhysRegUse[Reg] = NULL; - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) { unsigned SubReg = *SubRegs; PhysRegDef[SubReg] = MI; PhysRegUse[SubReg] = NULL; @@ -610,9 +609,9 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // if they have PHI nodes, and if so, we simulate an assignment at the end // of the current block. if (!PHIVarInfo[MBB->getNumber()].empty()) { - SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()]; + SmallVectorImpl<unsigned> &VarInfoVec = PHIVarInfo[MBB->getNumber()]; - for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(), + for (SmallVectorImpl<unsigned>::iterator I = VarInfoVec.begin(), E = VarInfoVec.end(); I != E; ++I) // Mark it alive only in the block we are representing. MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp index 91810bd..ca71e3b 100644 --- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -52,7 +52,7 @@ MCSymbol *MachineBasicBlock::getSymbol() const { if (!CachedMCSymbol) { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); - const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix(); + const char *Prefix = Ctx.getAsmInfo()->getPrivateGlobalPrefix(); CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber())); @@ -861,7 +861,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { LiveInterval &LI = LIS->getInterval(Reg); VNInfo *VNI = LI.getVNInfoAt(PrevIndex); assert(VNI && "PHI sources should be live out of their predecessors."); - LI.addRange(LiveRange(StartIndex, EndIndex, VNI)); + LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } } } @@ -880,9 +880,9 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (isLiveOut && isLastMBB) { VNInfo *VNI = LI.getVNInfoAt(PrevIndex); assert(VNI && "LiveInterval should have VNInfo where it is live."); - LI.addRange(LiveRange(StartIndex, EndIndex, VNI)); + LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } else if (!isLiveOut && !isLastMBB) { - LI.removeRange(StartIndex, EndIndex); + LI.removeSegment(StartIndex, EndIndex); } } diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 070daf2..e269d24 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -50,11 +50,6 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { return false; } -/// getblockFreq - Return block frequency. Return 0 if we don't have the -/// information. Please note that initial frequency is equal to 1024. It means -/// that we should not rely on the value itself, but only on the comparison to -/// the other block frequencies. We do this to avoid using of floating points. -/// BlockFrequency MachineBlockFrequencyInfo:: getBlockFreq(const MachineBasicBlock *MBB) const { return MBFI->getBlockFreq(MBB); diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp index bfba503..4b0f7f3 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -991,6 +991,28 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { Cond.clear(); MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // The "PrevBB" is not yet updated to reflect current code layout, so, + // o. it may fall-through to a block without explict "goto" instruction + // before layout, and no longer fall-through it after layout; or + // o. just opposite. + // + // AnalyzeBranch() may return erroneous value for FBB when these two + // situations take place. For the first scenario FBB is mistakenly set + // NULL; for the 2nd scenario, the FBB, which is expected to be NULL, + // is mistakenly pointing to "*BI". + // + bool needUpdateBr = true; + if (!Cond.empty() && (!FBB || FBB == *BI)) { + PrevBB->updateTerminator(); + needUpdateBr = false; + Cond.clear(); + TBB = FBB = 0; + if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // FIXME: This should never take place. + TBB = FBB = 0; + } + } + // If PrevBB has a two-way branch, try to re-order the branches // such that we branch to the successor with higher weight first. if (TBB && !Cond.empty() && FBB && @@ -1003,8 +1025,10 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { DebugLoc dl; // FIXME: this is nowhere TII->RemoveBranch(*PrevBB); TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); + needUpdateBr = true; } - PrevBB->updateTerminator(); + if (needUpdateBr) + PrevBB->updateTerminator(); } } diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp index 61d8d38..d228286 100644 --- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -84,11 +84,11 @@ namespace { bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet<unsigned,8> &PhysRefs, - SmallVector<unsigned,2> &PhysDefs, + SmallVectorImpl<unsigned> &PhysDefs, bool &PhysUseDef) const; bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, SmallSet<unsigned,8> &PhysRefs, - SmallVector<unsigned,2> &PhysDefs, + SmallVectorImpl<unsigned> &PhysDefs, bool &NonLocal) const; bool isCSECandidate(MachineInstr *MI); bool isProfitableToCSE(unsigned CSReg, unsigned Reg, @@ -193,7 +193,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet<unsigned,8> &PhysRefs, - SmallVector<unsigned,2> &PhysDefs, + SmallVectorImpl<unsigned> &PhysDefs, bool &PhysUseDef) const{ // First, add all uses to PhysRefs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -244,7 +244,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, SmallSet<unsigned,8> &PhysRefs, - SmallVector<unsigned,2> &PhysDefs, + SmallVectorImpl<unsigned> &PhysDefs, bool &NonLocal) const { // For now conservatively returns false if the common subexpression is // not in the same basic block as the given instruction. The only exception diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp index dc8a224..4f48e2c 100644 --- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -213,9 +213,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { CopyMap.erase(*AI); AvailCopyMap.erase(*AI); } - CopyMap[Def] = MI; - AvailCopyMap[Def] = MI; - for (MCSubRegIterator SR(Def, TRI); SR.isValid(); ++SR) { + for (MCSubRegIterator SR(Def, TRI, /*IncludeSelf=*/true); SR.isValid(); + ++SR) { CopyMap[*SR] = MI; AvailCopyMap[*SR] = MI; } diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp index 04321f3..0703df0 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -54,23 +55,28 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, GCModuleInfo* gmi) : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) { if (TM.getRegisterInfo()) - RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo()); + RegInfo = new (Allocator) MachineRegisterInfo(TM); else RegInfo = 0; + MFInfo = 0; - FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering(), - TM.Options.RealignStack); + FrameInfo = + new (Allocator) MachineFrameInfo(TM,!F->hasFnAttribute("no-realign-stack")); + if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::StackAlignment)) FrameInfo->ensureMaxAlignment(Fn->getAttributes(). getStackAlignment(AttributeSet::FunctionIndex)); - ConstantPool = new (Allocator) MachineConstantPool(TM.getDataLayout()); + + ConstantPool = new (Allocator) MachineConstantPool(TM); Alignment = TM.getTargetLowering()->getMinFunctionAlignment(); + // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) Alignment = std::max(Alignment, TM.getTargetLowering()->getPrefFunctionAlignment()); + FunctionNumber = FunctionNum; JumpTableInfo = 0; } @@ -456,11 +462,15 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const { // MachineFrameInfo implementation //===----------------------------------------------------------------------===// +const TargetFrameLowering *MachineFrameInfo::getFrameLowering() const { + return TM.getFrameLowering(); +} + /// ensureMaxAlignment - Make sure the function is at least Align bytes /// aligned. void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { - if (!TFI.isStackRealignable() || !RealignOption) - assert(Align <= TFI.getStackAlignment() && + if (!getFrameLowering()->isStackRealignable() || !RealignOption) + assert(Align <= getFrameLowering()->getStackAlignment() && "For targets without stack realignment, Align is out of limit!"); if (MaxAlignment < Align) MaxAlignment = Align; } @@ -482,8 +492,10 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, bool MayNeedSP, const AllocaInst *Alloca) { assert(Size != 0 && "Cannot allocate zero size stack objects!"); - Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Alignment, TFI.getStackAlignment()); + Alignment = + clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Alignment, getFrameLowering()->getStackAlignment()); Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP, Alloca)); int Index = (int)Objects.size() - NumFixedObjects - 1; @@ -498,8 +510,10 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, /// int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, unsigned Alignment) { - Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Alignment, TFI.getStackAlignment()); + Alignment = + clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Alignment, getFrameLowering()->getStackAlignment()); CreateStackObject(Size, Alignment, true, false); int Index = (int)Objects.size() - NumFixedObjects - 1; ensureMaxAlignment(Alignment); @@ -513,8 +527,10 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, /// int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) { HasVarSizedObjects = true; - Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Alignment, TFI.getStackAlignment()); + Alignment = + clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Alignment, getFrameLowering()->getStackAlignment()); Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; @@ -532,10 +548,12 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, // the incoming frame position. If the frame object is at offset 32 and // the stack is guaranteed to be 16-byte aligned, then we know that the // object is 16-byte aligned. - unsigned StackAlign = TFI.getStackAlignment(); + unsigned StackAlign = getFrameLowering()->getStackAlignment(); unsigned Align = MinAlign(SPOffset, StackAlign); - Align = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Align, TFI.getStackAlignment()); + Align = + clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Align, getFrameLowering()->getStackAlignment()); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, /*NeedSP*/ false, @@ -769,6 +787,10 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); } void MachineConstantPoolValue::anchor() { } +const DataLayout *MachineConstantPool::getDataLayout() const { + return TM.getDataLayout(); +} + Type *MachineConstantPoolEntry::getType() const { if (isMachineConstantPoolEntry()) return Val.MachineCPVal->getType(); @@ -850,7 +872,8 @@ unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, // FIXME, this could be made much more efficient for large constant pools. for (unsigned i = 0, e = Constants.size(); i != e; ++i) if (!Constants[i].isMachineConstantPoolEntry() && - CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, TD)) { + CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, + getDataLayout())) { if ((unsigned)Constants[i].getAlignment() < Alignment) Constants[i].Alignment = Alignment; return i; @@ -887,7 +910,7 @@ void MachineConstantPool::print(raw_ostream &OS) const { if (Constants[i].isMachineConstantPoolEntry()) Constants[i].Val.MachineCPVal->print(OS); else - OS << *(const Value*)Constants[i].Val.ConstVal; + WriteAsOperand(OS, Constants[i].Val.ConstVal, /*PrintType=*/false); OS << ", align=" << Constants[i].getAlignment(); OS << "\n"; } diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index 32d0668..295b450 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -647,12 +647,15 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) { } } +#ifndef NDEBUG + bool isMetaDataOp = Op.getType() == MachineOperand::MO_Metadata; // OpNo now points as the desired insertion point. Unless this is a variadic // instruction, only implicit regs are allowed beyond MCID->getNumOperands(). // RegMask operands go between the explicit and implicit operands. assert((isImpReg || Op.isRegMask() || MCID->isVariadic() || - OpNo < MCID->getNumOperands()) && + OpNo < MCID->getNumOperands() || isMetaDataOp) && "Trying to add an operand to a machine instr that is already done!"); +#endif MachineRegisterInfo *MRI = getRegInfo(); @@ -1253,32 +1256,6 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, return true; } -/// isSafeToReMat - Return true if it's safe to rematerialize the specified -/// instruction which defined the specified register instead of copying it. -bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, - AliasAnalysis *AA, - unsigned DstReg) const { - bool SawStore = false; - if (!TII->isTriviallyReMaterializable(this, AA) || - !isSafeToMove(TII, AA, SawStore)) - return false; - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - const MachineOperand &MO = getOperand(i); - if (!MO.isReg()) - continue; - // FIXME: For now, do not remat any instruction with register operands. - // Later on, we can loosen the restriction is the register operands have - // not been modified between the def and use. Note, this is different from - // MachineSink because the code is no longer in two-address form (at least - // partially). - if (MO.isUse()) - return false; - else if (!MO.isDead() && MO.getReg() != DstReg) - return false; - } - return true; -} - /// hasOrderedMemoryRef - Return true if this instruction may have an ordered /// or volatile memory reference, or if the information describing the memory /// reference is not available. Return false if it is known to have no ordered @@ -1411,8 +1388,10 @@ static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, const LLVMContext &Ctx = MF->getFunction()->getContext(); if (!DL.isUnknown()) { // Print source line info. DIScope Scope(DL.getScope(Ctx)); + assert((!Scope || Scope.isScope()) && + "Scope of a DebugLoc should be null or a DIScope."); // Omit the directory, because it's likely to be long and uninteresting. - if (Scope.Verify()) + if (Scope) CommentOS << Scope.getFilename(); else CommentOS << "<unknown>"; @@ -1726,31 +1705,31 @@ void MachineInstr::clearRegisterKills(unsigned Reg, } } -bool MachineInstr::addRegisterDead(unsigned IncomingReg, +bool MachineInstr::addRegisterDead(unsigned Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { - bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); + bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(Reg); bool hasAliases = isPhysReg && - MCRegAliasIterator(IncomingReg, RegInfo, false).isValid(); + MCRegAliasIterator(Reg, RegInfo, false).isValid(); bool Found = false; SmallVector<unsigned,4> DeadOps; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { MachineOperand &MO = getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (!Reg) + unsigned MOReg = MO.getReg(); + if (!MOReg) continue; - if (Reg == IncomingReg) { + if (MOReg == Reg) { MO.setIsDead(); Found = true; } else if (hasAliases && MO.isDead() && - TargetRegisterInfo::isPhysicalRegister(Reg)) { + TargetRegisterInfo::isPhysicalRegister(MOReg)) { // There exists a super-register that's marked dead. - if (RegInfo->isSuperRegister(IncomingReg, Reg)) + if (RegInfo->isSuperRegister(Reg, MOReg)) return true; - if (RegInfo->isSubRegister(IncomingReg, Reg)) + if (RegInfo->isSubRegister(Reg, MOReg)) DeadOps.push_back(i); } } @@ -1770,7 +1749,7 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, if (Found || !AddIfNotFound) return Found; - addOperand(MachineOperand::CreateReg(IncomingReg, + addOperand(MachineOperand::CreateReg(Reg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/, @@ -1778,21 +1757,21 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, return true; } -void MachineInstr::addRegisterDefined(unsigned IncomingReg, +void MachineInstr::addRegisterDefined(unsigned Reg, const TargetRegisterInfo *RegInfo) { - if (TargetRegisterInfo::isPhysicalRegister(IncomingReg)) { - MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + MachineOperand *MO = findRegisterDefOperand(Reg, false, RegInfo); if (MO) return; } else { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); - if (MO.isReg() && MO.getReg() == IncomingReg && MO.isDef() && + if (MO.isReg() && MO.getReg() == Reg && MO.isDef() && MO.getSubReg() == 0) return; } } - addOperand(MachineOperand::CreateReg(IncomingReg, + addOperand(MachineOperand::CreateReg(Reg, true /*IsDef*/, true /*IsImp*/)); } diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp index ed3ed4d..104eacd 100644 --- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -172,7 +172,7 @@ namespace { BitVector &PhysRegDefs, BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, - SmallVector<CandidateInfo, 32> &Candidates); + SmallVectorImpl<CandidateInfo> &Candidates); /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the /// current loop. @@ -404,7 +404,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, - SmallVector<CandidateInfo, 32> &Candidates) { + SmallVectorImpl<CandidateInfo> &Candidates) { bool RuledOut = false; bool HasNonInvariantUse = false; unsigned Def = 0; @@ -468,12 +468,12 @@ void MachineLICM::ProcessMI(MachineInstr *MI, for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { if (PhysRegDefs.test(*AS)) PhysRegClobbers.set(*AS); - if (PhysRegClobbers.test(*AS)) - // MI defined register is seen defined by another instruction in - // the loop, it cannot be a LICM candidate. - RuledOut = true; PhysRegDefs.set(*AS); } + if (PhysRegClobbers.test(Reg)) + // MI defined register is seen defined by another instruction in + // the loop, it cannot be a LICM candidate. + RuledOut = true; } // Only consider reloads for now and remats which do not have register @@ -502,7 +502,7 @@ void MachineLICM::HoistRegionPostRA() { // Walk the entire region, count number of defs for each register, and // collect potential LICM candidates. - const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); + const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *BB = Blocks[i]; @@ -584,7 +584,7 @@ void MachineLICM::HoistRegionPostRA() { /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current /// loop, and make sure it is not killed by any instructions in the loop. void MachineLICM::AddToLiveIns(unsigned Reg) { - const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); + const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *BB = Blocks[i]; if (!BB->isLiveIn(Reg)) @@ -1084,7 +1084,7 @@ bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, return true; for (unsigned i = BackTrace.size(); i != 0; --i) { - SmallVector<unsigned, 8> &RP = BackTrace[i-1]; + SmallVectorImpl<unsigned> &RP = BackTrace[i-1]; if (RP[RCId] + Cost >= Limit) return true; } @@ -1130,7 +1130,7 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { // Update register pressure of blocks from loop header to current block. for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) { - SmallVector<unsigned, 8> &RP = BackTrace[i]; + SmallVectorImpl<unsigned> &RP = BackTrace[i]; for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); CI != CE; ++CI) { unsigned RCId = CI->first; diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp index 8af9d05..bb54284 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -253,13 +253,12 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, const MCObjectFileInfo *MOFI) - : ImmutablePass(ID), Context(MAI, MRI, MOFI, 0, false) { + : ImmutablePass(ID), Context(&MAI, &MRI, MOFI, 0, false) { initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); } MachineModuleInfo::MachineModuleInfo() - : ImmutablePass(ID), - Context(*(MCAsmInfo*)0, *(MCRegisterInfo*)0, (MCObjectFileInfo*)0) { + : ImmutablePass(ID), Context(0, 0, 0) { llvm_unreachable("This MachineModuleInfo constructor should never be called, " "MMI should always be explicitly constructed by " "LLVMTargetMachine"); @@ -303,7 +302,7 @@ bool MachineModuleInfo::doFinalization(Module &M) { /// void MachineModuleInfo::EndFunction() { // Clean up frame info. - FrameMoves.clear(); + FrameInstructions.clear(); // Clean up exception info. LandingPads.clear(); diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 68372f6..f8b8796 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -19,16 +19,21 @@ using namespace llvm; -MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) - : TRI(&TRI), IsSSA(true), TracksLiveness(true) { +// Pin the vtable to this file. +void MachineRegisterInfo::Delegate::anchor() {} + +MachineRegisterInfo::MachineRegisterInfo(const TargetMachine &TM) + : TM(TM), TheDelegate(0), IsSSA(true), TracksLiveness(true) { VRegInfo.reserve(256); RegAllocHints.reserve(256); - UsedRegUnits.resize(TRI.getNumRegUnits()); - UsedPhysRegMask.resize(TRI.getNumRegs()); + UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits()); + UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs()); // Create the physreg use/def lists. - PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()]; - memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs()); + PhysRegUseDefLists = + new MachineOperand*[getTargetRegisterInfo()->getNumRegs()]; + memset(PhysRegUseDefLists, 0, + sizeof(MachineOperand*)*getTargetRegisterInfo()->getNumRegs()); } MachineRegisterInfo::~MachineRegisterInfo() { @@ -50,7 +55,8 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg, const TargetRegisterClass *OldRC = getRegClass(Reg); if (OldRC == RC) return RC; - const TargetRegisterClass *NewRC = TRI->getCommonSubClass(OldRC, RC); + const TargetRegisterClass *NewRC = + getTargetRegisterInfo()->getCommonSubClass(OldRC, RC); if (!NewRC || NewRC == OldRC) return NewRC; if (NewRC->getNumRegs() < MinNumRegs) @@ -63,7 +69,8 @@ bool MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterClass *OldRC = getRegClass(Reg); - const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC); + const TargetRegisterClass *NewRC = + getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC); // Stop early if there is no room to grow. if (NewRC == OldRC) @@ -73,14 +80,16 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E; ++I) { const TargetRegisterClass *OpRC = - I->getRegClassConstraint(I.getOperandNo(), TII, TRI); + I->getRegClassConstraint(I.getOperandNo(), TII, + getTargetRegisterInfo()); if (unsigned SubIdx = I.getOperand().getSubReg()) { if (OpRC) - NewRC = TRI->getMatchingSuperRegClass(NewRC, OpRC, SubIdx); + NewRC = getTargetRegisterInfo()->getMatchingSuperRegClass(NewRC, OpRC, + SubIdx); else - NewRC = TRI->getSubClassWithSubReg(NewRC, SubIdx); + NewRC = getTargetRegisterInfo()->getSubClassWithSubReg(NewRC, SubIdx); } else if (OpRC) - NewRC = TRI->getCommonSubClass(NewRC, OpRC); + NewRC = getTargetRegisterInfo()->getCommonSubClass(NewRC, OpRC); if (!NewRC || NewRC == OldRC) return false; } @@ -102,6 +111,8 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ VRegInfo.grow(Reg); VRegInfo[Reg].first = RegClass; RegAllocHints.grow(Reg); + if (TheDelegate) + TheDelegate->MRI_NoteNewVirtualRegister(Reg); return Reg; } @@ -126,24 +137,28 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const { MachineOperand *MO = &I.getOperand(); MachineInstr *MI = MO->getParent(); if (!MI) { - errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " use list MachineOperand " << MO << " has no parent instruction.\n"; Valid = false; } MachineOperand *MO0 = &MI->getOperand(0); unsigned NumOps = MI->getNumOperands(); if (!(MO >= MO0 && MO < MO0+NumOps)) { - errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " use list MachineOperand " << MO << " doesn't belong to parent MI: " << *MI; Valid = false; } if (!MO->isReg()) { - errs() << PrintReg(Reg, TRI) << " MachineOperand " << MO << ": " << *MO + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " MachineOperand " << MO << ": " << *MO << " is not a register\n"; Valid = false; } if (MO->getReg() != Reg) { - errs() << PrintReg(Reg, TRI) << " use-list MachineOperand " << MO << ": " + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " use-list MachineOperand " << MO << ": " << *MO << " is the wrong register\n"; Valid = false; } @@ -156,7 +171,7 @@ void MachineRegisterInfo::verifyUseLists() const { #ifndef NDEBUG for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) verifyUseList(TargetRegisterInfo::index2VirtReg(i)); - for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i) + for (unsigned i = 1, e = getTargetRegisterInfo()->getNumRegs(); i != e; ++i) verifyUseList(i); #endif } @@ -390,8 +405,8 @@ void MachineRegisterInfo::dumpUses(unsigned Reg) const { #endif void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) { - ReservedRegs = TRI->getReservedRegs(MF); - assert(ReservedRegs.size() == TRI->getNumRegs() && + ReservedRegs = getTargetRegisterInfo()->getReservedRegs(MF); + assert(ReservedRegs.size() == getTargetRegisterInfo()->getNumRegs() && "Invalid ReservedRegs vector from target"); } @@ -401,7 +416,8 @@ bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg, // Check if any overlapping register is modified, or allocatable so it may be // used later. - for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) + for (MCRegAliasIterator AI(PhysReg, getTargetRegisterInfo(), true); + AI.isValid(); ++AI) if (!def_empty(*AI) || isAllocatable(*AI)) return false; return true; diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp index bb6aad7..17f0af8 100644 --- a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -77,7 +77,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) { static unsigned LookForIdenticalPHI(MachineBasicBlock *BB, - SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> &PredValues) { + SmallVectorImpl<std::pair<MachineBasicBlock*, unsigned> > &PredValues) { if (BB->empty()) return 0; diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp index fff6b2b..e71c4df 100644 --- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDFS.h" @@ -30,6 +31,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" #include <queue> using namespace llvm; @@ -51,10 +53,11 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG -// FIXME: remove this flag after initial testing. It should always be a good -// thing. -static cl::opt<bool> EnableCopyConstrain("misched-vcopy", cl::Hidden, - cl::desc("Constrain vreg copies."), cl::init(true)); +static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden, + cl::desc("Enable register pressure scheduling."), cl::init(true)); + +static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden, + cl::desc("Enable cyclic critical path analysis."), cl::init(true)); static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, cl::desc("Enable load clustering."), cl::init(true)); @@ -69,6 +72,10 @@ static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden, // DAG subtrees must have at least this many nodes. static const unsigned MinSubtreeSize = 8; +// Pin the vtables to this file. +void MachineSchedStrategy::anchor() {} +void ScheduleDAGMutation::anchor() {} + //===----------------------------------------------------------------------===// // Machine Instruction Scheduling Pass and Registry //===----------------------------------------------------------------------===// @@ -98,6 +105,9 @@ public: virtual void print(raw_ostream &O, const Module* = 0) const; static char ID; // Class identification, replacement for typeinfo + +protected: + ScheduleDAGInstrs *createMachineScheduler(); }; } // namespace @@ -152,12 +162,13 @@ DefaultSchedRegistry("default", "Use the target's default scheduler choice.", /// Forward declare the standard machine scheduler. This will be used as the /// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); +static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C); /// Decrement this iterator until reaching the top or a non-debug instr. -static MachineBasicBlock::iterator -priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { +static MachineBasicBlock::const_iterator +priorNonDebug(MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator Beg) { assert(I != Beg && "reached the top of the region, cannot decrement"); while (--I != Beg) { if (!I->isDebugValue()) @@ -166,10 +177,19 @@ priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { return I; } +/// Non-const version. +static MachineBasicBlock::iterator +priorNonDebug(MachineBasicBlock::iterator I, + MachineBasicBlock::const_iterator Beg) { + return const_cast<MachineInstr*>( + &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)); +} + /// If this iterator is a debug value, increment until reaching the End or a /// non-debug instruction. -static MachineBasicBlock::iterator -nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { +static MachineBasicBlock::const_iterator +nextIfDebug(MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator End) { for(; I != End; ++I) { if (!I->isDebugValue()) break; @@ -177,6 +197,34 @@ nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { return I; } +/// Non-const version. +static MachineBasicBlock::iterator +nextIfDebug(MachineBasicBlock::iterator I, + MachineBasicBlock::const_iterator End) { + // Cast the return value to nonconst MachineInstr, then cast to an + // instr_iterator, which does not check for null, finally return a + // bundle_iterator. + return MachineBasicBlock::instr_iterator( + const_cast<MachineInstr*>( + &*nextIfDebug(MachineBasicBlock::const_iterator(I), End))); +} + +/// Instantiate a ScheduleDAGInstrs that will be owned by the caller. +ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() { + // Select the scheduler, or set the default. + MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; + if (Ctor != useDefaultMachineSched) + return Ctor(this); + + // Get the default scheduler set by the target for this function. + ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this); + if (Scheduler) + return Scheduler; + + // Default to GenericScheduler. + return createGenericSched(this); +} + /// Top-level MachineScheduler pass driver. /// /// Visit blocks in function order. Divide each block into scheduling regions @@ -207,23 +255,14 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); if (VerifyScheduling) { - DEBUG(LIS->print(dbgs())); + DEBUG(LIS->dump()); MF->verify(this, "Before machine scheduling."); } RegClassInfo->runOnMachineFunction(*MF); - // Select the scheduler, or set the default. - MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; - if (Ctor == useDefaultMachineSched) { - // Get the default scheduler set by the target. - Ctor = MachineSchedRegistry::getDefault(); - if (!Ctor) { - Ctor = createConvergingSched; - MachineSchedRegistry::setDefault(Ctor); - } - } - // Instantiate the selected scheduler. - OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); + // Instantiate the selected scheduler for this target, function, and + // optimization level. + OwningPtr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); // Visit all machine basic blocks. // @@ -258,14 +297,15 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // The next region starts above the previous region. Look backward in the // instruction stream until we find the nearest boundary. + unsigned NumRegionInstrs = 0; MachineBasicBlock::iterator I = RegionEnd; - for(;I != MBB->begin(); --I, --RemainingInstrs) { + for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) { if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) break; } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. - Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs); + Scheduler->enterRegion(MBB, I, RegionEnd, NumRegionInstrs); // Skip empty scheduling regions (0 or 1 schedulable instructions). if (I == RegionEnd || I == llvm::prior(RegionEnd)) { @@ -280,7 +320,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; - dbgs() << " Remaining: " << RemainingInstrs << "\n"); + dbgs() << " RegionInstrs: " << NumRegionInstrs + << " Remaining: " << RemainingInstrs << "\n"); // Schedule a region: possibly reorder instructions. // This invalidates 'RegionEnd' and 'I'. @@ -297,7 +338,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { Scheduler->finishBlock(); } Scheduler->finalizeSchedule(); - DEBUG(LIS->print(dbgs())); + DEBUG(LIS->dump()); if (VerifyScheduling) MF->verify(this, "After machine scheduling."); return true; @@ -309,7 +350,7 @@ void MachineScheduler::print(raw_ostream &O, const Module* m) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ReadyQueue::dump() { - dbgs() << " " << Name << ": "; + dbgs() << Name << ": "; for (unsigned i = 0, e = Queue.size(); i < e; ++i) dbgs() << Queue[i]->NodeNum << " "; dbgs() << "\n"; @@ -449,13 +490,19 @@ bool ScheduleDAGMI::checkSchedLimit() { void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, - unsigned endcount) + unsigned regioninstrs) { - ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); // For convenience remember the end of the liveness region. LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); + + SUPressureDiffs.clear(); + + SchedImpl->initPolicy(begin, end, regioninstrs); + + ShouldTrackPressure = SchedImpl->shouldTrackPressure(); } // Setup the register pressure trackers for the top scheduled top and bottom @@ -467,7 +514,7 @@ void ScheduleDAGMI::initRegPressure() { // Close the RPTracker to finalize live ins. RPTracker.closeRegion(); - DEBUG(RPTracker.getPressure().dump(TRI)); + DEBUG(RPTracker.dump()); // Initialize the live ins and live outs. TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); @@ -479,9 +526,23 @@ void ScheduleDAGMI::initRegPressure() { TopRPTracker.closeTop(); BotRPTracker.closeBottom(); + BotRPTracker.initLiveThru(RPTracker); + if (!BotRPTracker.getLiveThru().empty()) { + TopRPTracker.initLiveThru(BotRPTracker.getLiveThru()); + DEBUG(dbgs() << "Live Thru: "; + dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI)); + }; + + // For each live out vreg reduce the pressure change associated with other + // uses of the same vreg below the live-out reaching def. + updatePressureDiffs(RPTracker.getPressure().LiveOutRegs); + // Account for liveness generated by the region boundary. - if (LiveRegionEnd != RegionEnd) - BotRPTracker.recede(); + if (LiveRegionEnd != RegionEnd) { + SmallVector<unsigned, 8> LiveUses; + BotRPTracker.recede(&LiveUses); + updatePressureDiffs(LiveUses); + } assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); @@ -491,38 +552,88 @@ void ScheduleDAGMI::initRegPressure() { const std::vector<unsigned> &RegionPressure = RPTracker.getPressure().MaxSetPressure; for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { - unsigned Limit = TRI->getRegPressureSetLimit(i); - DEBUG(dbgs() << TRI->getRegPressureSetName(i) - << "Limit " << Limit - << " Actual " << RegionPressure[i] << "\n"); - if (RegionPressure[i] > Limit) - RegionCriticalPSets.push_back(PressureElement(i, 0)); + unsigned Limit = RegClassInfo->getRegPressureSetLimit(i); + if (RegionPressure[i] > Limit) { + DEBUG(dbgs() << TRI->getRegPressureSetName(i) + << " Limit " << Limit + << " Actual " << RegionPressure[i] << "\n"); + RegionCriticalPSets.push_back(PressureChange(i)); + } } DEBUG(dbgs() << "Excess PSets: "; for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) dbgs() << TRI->getRegPressureSetName( - RegionCriticalPSets[i].PSetID) << " "; + RegionCriticalPSets[i].getPSet()) << " "; dbgs() << "\n"); } -// FIXME: When the pressure tracker deals in pressure differences then we won't -// iterate over all RegionCriticalPSets[i]. void ScheduleDAGMI:: -updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) { - for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { - unsigned ID = RegionCriticalPSets[i].PSetID; - int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; - if ((int)NewMaxPressure[ID] > MaxUnits) - MaxUnits = NewMaxPressure[ID]; +updateScheduledPressure(const SUnit *SU, + const std::vector<unsigned> &NewMaxPressure) { + const PressureDiff &PDiff = getPressureDiff(SU); + unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size(); + for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end(); + I != E; ++I) { + if (!I->isValid()) + break; + unsigned ID = I->getPSet(); + while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID) + ++CritIdx; + if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) { + if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc() + && NewMaxPressure[ID] <= INT16_MAX) + RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]); + } + unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); + if (NewMaxPressure[ID] >= Limit - 2) { + DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " + << NewMaxPressure[ID] << " > " << Limit << "(+ " + << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); + } } - DEBUG( - for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) { - unsigned Limit = TRI->getRegPressureSetLimit(i); - if (NewMaxPressure[i] > Limit ) { - dbgs() << " " << TRI->getRegPressureSetName(i) << ": " - << NewMaxPressure[i] << " > " << Limit << "\n"; +} + +/// Update the PressureDiff array for liveness after scheduling this +/// instruction. +void ScheduleDAGMI::updatePressureDiffs(ArrayRef<unsigned> LiveUses) { + for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) { + /// FIXME: Currently assuming single-use physregs. + unsigned Reg = LiveUses[LUIdx]; + DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); + if (!TRI->isVirtualRegister(Reg)) + continue; + + // This may be called before CurrentBottom has been initialized. However, + // BotRPTracker must have a valid position. We want the value live into the + // instruction or live out of the block, so ask for the previous + // instruction's live-out. + const LiveInterval &LI = LIS->getInterval(Reg); + VNInfo *VNI; + MachineBasicBlock::const_iterator I = + nextIfDebug(BotRPTracker.getPos(), BB->end()); + if (I == BB->end()) + VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); + else { + LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I)); + VNI = LRQ.valueIn(); + } + // RegisterPressureTracker guarantees that readsReg is true for LiveUses. + assert(VNI && "No live value at use."); + for (VReg2UseMap::iterator + UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { + SUnit *SU = UI->SU; + DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " + << *SU->getInstr()); + // If this use comes before the reaching def, it cannot be a last use, so + // descrease its pressure change. + if (!SU->isScheduled && SU != &ExitSU) { + LiveQueryResult LRQ + = LI.Query(LIS->getInstructionIndex(SU->getInstr())); + if (LRQ.valueIn() == VNI) + getPressureDiff(SU).addPressureChange(Reg, true, &MRI); } - }); + } + } } /// schedule - Called back from MachineScheduler::runOnMachineFunction @@ -580,15 +691,23 @@ void ScheduleDAGMI::schedule() { /// Build the DAG and setup three register pressure trackers. void ScheduleDAGMI::buildDAGWithRegPressure() { + if (!ShouldTrackPressure) { + RPTracker.reset(); + RegionCriticalPSets.clear(); + buildSchedGraph(AA); + return; + } + // Initialize the register pressure tracker used by buildSchedGraph. - RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, + /*TrackUntiedDefs=*/true); // Account for liveness generate by the region boundary. if (LiveRegionEnd != RegionEnd) RPTracker.recede(); // Build the DAG, and compute current register pressure. - buildSchedGraph(AA, &RPTracker); + buildSchedGraph(AA, &RPTracker, &SUPressureDiffs); // Initialize top/bottom trackers after computing region pressure. initRegPressure(); @@ -631,6 +750,91 @@ void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, ExitSU.biasCriticalPath(); } +/// Compute the max cyclic critical path through the DAG. The scheduling DAG +/// only provides the critical path for single block loops. To handle loops that +/// span blocks, we could use the vreg path latencies provided by +/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently +/// available for use in the scheduler. +/// +/// The cyclic path estimation identifies a def-use pair that crosses the back +/// edge and considers the depth and height of the nodes. For example, consider +/// the following instruction sequence where each instruction has unit latency +/// and defines an epomymous virtual register: +/// +/// a->b(a,c)->c(b)->d(c)->exit +/// +/// The cyclic critical path is a two cycles: b->c->b +/// The acyclic critical path is four cycles: a->b->c->d->exit +/// LiveOutHeight = height(c) = len(c->d->exit) = 2 +/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3 +/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4 +/// LiveInDepth = depth(b) = len(a->b) = 1 +/// +/// LiveOutDepth - LiveInDepth = 3 - 1 = 2 +/// LiveInHeight - LiveOutHeight = 4 - 2 = 2 +/// CyclicCriticalPath = min(2, 2) = 2 +unsigned ScheduleDAGMI::computeCyclicCriticalPath() { + // This only applies to single block loop. + if (!BB->isSuccessor(BB)) + return 0; + + unsigned MaxCyclicLatency = 0; + // Visit each live out vreg def to find def/use pairs that cross iterations. + ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs; + for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end(); + RI != RE; ++RI) { + unsigned Reg = *RI; + if (!TRI->isVirtualRegister(Reg)) + continue; + const LiveInterval &LI = LIS->getInterval(Reg); + const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); + if (!DefVNI) + continue; + + MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def); + const SUnit *DefSU = getSUnit(DefMI); + if (!DefSU) + continue; + + unsigned LiveOutHeight = DefSU->getHeight(); + unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; + // Visit all local users of the vreg def. + for (VReg2UseMap::iterator + UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { + if (UI->SU == &ExitSU) + continue; + + // Only consider uses of the phi. + LiveQueryResult LRQ = + LI.Query(LIS->getInstructionIndex(UI->SU->getInstr())); + if (!LRQ.valueIn()->isPHIDef()) + continue; + + // Assume that a path spanning two iterations is a cycle, which could + // overestimate in strange cases. This allows cyclic latency to be + // estimated as the minimum slack of the vreg's depth or height. + unsigned CyclicLatency = 0; + if (LiveOutDepth > UI->SU->getDepth()) + CyclicLatency = LiveOutDepth - UI->SU->getDepth(); + + unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency; + if (LiveInHeight > LiveOutHeight) { + if (LiveInHeight - LiveOutHeight < CyclicLatency) + CyclicLatency = LiveInHeight - LiveOutHeight; + } + else + CyclicLatency = 0; + + DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" + << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n"); + if (CyclicLatency > MaxCyclicLatency) + MaxCyclicLatency = CyclicLatency; + } + } + DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n"); + return MaxCyclicLatency; +} + /// Identify DAG roots and setup scheduler queues. void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots) { @@ -658,11 +862,13 @@ void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, SchedImpl->registerRoots(); // Advance past initial DebugValues. - assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); CurrentTop = nextIfDebug(RegionBegin, RegionEnd); - TopRPTracker.setPos(CurrentTop); - CurrentBottom = RegionEnd; + + if (ShouldTrackPressure) { + assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); + TopRPTracker.setPos(CurrentTop); + } } /// Move an instruction and update register pressure. @@ -679,10 +885,12 @@ void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { TopRPTracker.setPos(MI); } - // Update top scheduled pressure. - TopRPTracker.advance(); - assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); - updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); + if (ShouldTrackPressure) { + // Update top scheduled pressure. + TopRPTracker.advance(); + assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); + updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); + } } else { assert(SU->isBottomReady() && "node still has unscheduled dependencies"); @@ -698,10 +906,14 @@ void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { moveInstruction(MI, CurrentBottom); CurrentBottom = MI; } - // Update bottom scheduled pressure. - BotRPTracker.recede(); - assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); - updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); + if (ShouldTrackPressure) { + // Update bottom scheduled pressure. + SmallVector<unsigned, 8> LiveUses; + BotRPTracker.recede(&LiveUses); + assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); + updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); + updatePressureDiffs(LiveUses); + } } } @@ -1019,6 +1231,12 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) { GlobalSegment->start)) { return; } + // If the prior global segment may be defined by the same two-address + // instruction that also defines LocalLI, then can't make a hole here. + if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->start, + LocalLI->beginIndex())) { + return; + } // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise // it would be a disconnected component in the live range. assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() && @@ -1101,24 +1319,23 @@ void CopyConstrain::apply(ScheduleDAGMI *DAG) { } //===----------------------------------------------------------------------===// -// ConvergingScheduler - Implementation of the standard MachineSchedStrategy. +// GenericScheduler - Implementation of the generic MachineSchedStrategy. //===----------------------------------------------------------------------===// namespace { -/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance +/// GenericScheduler shrinks the unscheduled zone using heuristics to balance /// the schedule. -class ConvergingScheduler : public MachineSchedStrategy { +class GenericScheduler : public MachineSchedStrategy { public: /// Represent the type of SchedCandidate found within a single queue. /// pickNodeBidirectional depends on these listed by decreasing priority. enum CandReason { - NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster, Weak, + NoCand, PhysRegCopy, RegExcess, RegCritical, Cluster, Weak, RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, - TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse, - NodeOrder}; + TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; #ifndef NDEBUG - static const char *getReasonStr(ConvergingScheduler::CandReason Reason); + static const char *getReasonStr(GenericScheduler::CandReason Reason); #endif /// Policy for scheduling the next instruction in the candidate's zone. @@ -1149,7 +1366,7 @@ public: } }; - /// Store the state used by ConvergingScheduler heuristics, required for the + /// Store the state used by GenericScheduler heuristics, required for the /// lifetime of one invocation of pickNode(). struct SchedCandidate { CandPolicy Policy; @@ -1160,6 +1377,9 @@ public: // The reason for this candidate. CandReason Reason; + // Set of reasons that apply to multiple candidates. + uint32_t RepeatReasonSet; + // Register pressure values for the best candidate. RegPressureDelta RPDelta; @@ -1167,7 +1387,7 @@ public: SchedResourceDelta ResDelta; SchedCandidate(const CandPolicy &policy) - : Policy(policy), SU(NULL), Reason(NoCand) {} + : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {} bool isValid() const { return SU; } @@ -1180,6 +1400,9 @@ public: ResDelta = Best.ResDelta; } + bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); } + void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); } + void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); }; @@ -1188,33 +1411,27 @@ public: struct SchedRemainder { // Critical path through the DAG in expected latency. unsigned CriticalPath; + unsigned CyclicCritPath; + + // Scaled count of micro-ops left to schedule. + unsigned RemIssueCount; + + bool IsAcyclicLatencyLimited; // Unscheduled resources SmallVector<unsigned, 16> RemainingCounts; - // Critical resource for the unscheduled zone. - unsigned CritResIdx; - // Number of micro-ops left to schedule. - unsigned RemainingMicroOps; void reset() { CriticalPath = 0; + CyclicCritPath = 0; + RemIssueCount = 0; + IsAcyclicLatencyLimited = false; RemainingCounts.clear(); - CritResIdx = 0; - RemainingMicroOps = 0; } SchedRemainder() { reset(); } void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); - - unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const { - if (!SchedModel->hasInstrSchedModel()) - return 0; - - return std::max( - RemainingMicroOps * SchedModel->getMicroOpFactor(), - RemainingCounts[CritResIdx]); - } }; /// Each Scheduling boundary is associated with ready queues. It tracks the @@ -1235,8 +1452,13 @@ public: ScheduleHazardRecognizer *HazardRec; + /// Number of cycles it takes to issue the instructions scheduled in this + /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. + /// See getStalls(). unsigned CurrCycle; - unsigned IssueCount; + + /// Micro-ops issued in the current cycle + unsigned CurrMOps; /// MinReadyCycle - Cycle of the soonest available instruction. unsigned MinReadyCycle; @@ -1244,52 +1466,71 @@ public: // The expected latency of the critical path in this scheduled zone. unsigned ExpectedLatency; - // Resources used in the scheduled zone beyond this boundary. - SmallVector<unsigned, 16> ResourceCounts; + // The latency of dependence chains leading into this zone. + // For each node scheduled bottom-up: DLat = max DLat, N.Depth. + // For each cycle scheduled: DLat -= 1. + unsigned DependentLatency; + + /// Count the scheduled (issued) micro-ops that can be retired by + /// time=CurrCycle assuming the first scheduled instr is retired at time=0. + unsigned RetiredMOps; + + // Count scheduled resources that have been executed. Resources are + // considered executed if they become ready in the time that it takes to + // saturate any resource including the one in question. Counts are scaled + // for direct comparison with other resources. Counts can be compared with + // MOps * getMicroOpFactor and Latency * getLatencyFactor. + SmallVector<unsigned, 16> ExecutedResCounts; + + /// Cache the max count for a single resource. + unsigned MaxExecutedResCount; // Cache the critical resources ID in this scheduled zone. - unsigned CritResIdx; + unsigned ZoneCritResIdx; // Is the scheduled region resource limited vs. latency limited. bool IsResourceLimited; - unsigned ExpectedCount; - #ifndef NDEBUG - // Remember the greatest min operand latency. - unsigned MaxMinLatency; + // Remember the greatest operand latency as an upper bound on the number of + // times we should retry the pending queue because of a hazard. + unsigned MaxObservedLatency; #endif void reset() { // A new HazardRec is created for each DAG and owned by SchedBoundary. - delete HazardRec; - + // Destroying and reconstructing it is very expensive though. So keep + // invalid, placeholder HazardRecs. + if (HazardRec && HazardRec->isEnabled()) { + delete HazardRec; + HazardRec = 0; + } Available.clear(); Pending.clear(); CheckPending = false; NextSUs.clear(); - HazardRec = 0; CurrCycle = 0; - IssueCount = 0; + CurrMOps = 0; MinReadyCycle = UINT_MAX; ExpectedLatency = 0; - ResourceCounts.resize(1); - assert(!ResourceCounts[0] && "nonzero count for bad resource"); - CritResIdx = 0; + DependentLatency = 0; + RetiredMOps = 0; + MaxExecutedResCount = 0; + ZoneCritResIdx = 0; IsResourceLimited = false; - ExpectedCount = 0; #ifndef NDEBUG - MaxMinLatency = 0; + MaxObservedLatency = 0; #endif // Reserve a zero-count for invalid CritResIdx. - ResourceCounts.resize(1); + ExecutedResCounts.resize(1); + assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); } /// Pending queues extend the ready queues with the same ID and the /// PendingFlag set. SchedBoundary(unsigned ID, const Twine &Name): DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), - Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"), + Pending(ID << GenericScheduler::LogMaxQID, Name+".P"), HazardRec(0) { reset(); } @@ -1300,28 +1541,63 @@ public: SchedRemainder *rem); bool isTop() const { - return Available.getID() == ConvergingScheduler::TopQID; + return Available.getID() == GenericScheduler::TopQID; + } + +#ifndef NDEBUG + const char *getResourceName(unsigned PIdx) { + if (!PIdx) + return "MOps"; + return SchedModel->getProcResource(PIdx)->Name; + } +#endif + + /// Get the number of latency cycles "covered" by the scheduled + /// instructions. This is the larger of the critical path within the zone + /// and the number of cycles required to issue the instructions. + unsigned getScheduledLatency() const { + return std::max(ExpectedLatency, CurrCycle); } unsigned getUnscheduledLatency(SUnit *SU) const { - if (isTop()) - return SU->getHeight(); - return SU->getDepth() + SU->Latency; + return isTop() ? SU->getHeight() : SU->getDepth(); + } + + unsigned getResourceCount(unsigned ResIdx) const { + return ExecutedResCounts[ResIdx]; } + /// Get the scaled count of scheduled micro-ops and resources, including + /// executed resources. unsigned getCriticalCount() const { - return ResourceCounts[CritResIdx]; + if (!ZoneCritResIdx) + return RetiredMOps * SchedModel->getMicroOpFactor(); + return getResourceCount(ZoneCritResIdx); + } + + /// Get a scaled count for the minimum execution time of the scheduled + /// micro-ops that are ready to execute by getExecutedCount. Notice the + /// feedback loop. + unsigned getExecutedCount() const { + return std::max(CurrCycle * SchedModel->getLatencyFactor(), + MaxExecutedResCount); } bool checkHazard(SUnit *SU); - void setLatencyPolicy(CandPolicy &Policy); + unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs); + + unsigned getOtherResourceCount(unsigned &OtherCritIdx); + + void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone); void releaseNode(SUnit *SU, unsigned ReadyCycle); - void bumpCycle(); + void bumpCycle(unsigned NextCycle); - void countResource(unsigned PIdx, unsigned Cycles); + void incExecutedResources(unsigned PIdx, unsigned Count); + + unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle); void bumpNode(SUnit *SU); @@ -1330,9 +1606,14 @@ public: void removeReady(SUnit *SU); SUnit *pickOnlyChoice(); + +#ifndef NDEBUG + void dumpScheduledState(); +#endif }; private: + const MachineSchedContext *Context; ScheduleDAGMI *DAG; const TargetSchedModel *SchedModel; const TargetRegisterInfo *TRI; @@ -1342,6 +1623,7 @@ private: SchedBoundary Top; SchedBoundary Bot; + MachineSchedPolicy RegionPolicy; public: /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) enum { @@ -1350,8 +1632,15 @@ public: LogMaxQID = 2 }; - ConvergingScheduler(): - DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + GenericScheduler(const MachineSchedContext *C): + Context(C), DAG(0), SchedModel(0), TRI(0), + Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + + virtual void initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs); + + bool shouldTrackPressure() const { return RegionPolicy.ShouldTrackPressure; } virtual void initialize(ScheduleDAGMI *dag); @@ -1366,14 +1655,7 @@ public: virtual void registerRoots(); protected: - void balanceZones( - ConvergingScheduler::SchedBoundary &CriticalZone, - ConvergingScheduler::SchedCandidate &CriticalCand, - ConvergingScheduler::SchedBoundary &OppositeZone, - ConvergingScheduler::SchedCandidate &OppositeCand); - - void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, - ConvergingScheduler::SchedCandidate &BotCand); + void checkAcyclicLatency(); void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, @@ -1395,7 +1677,7 @@ protected: }; } // namespace -void ConvergingScheduler::SchedRemainder:: +void GenericScheduler::SchedRemainder:: init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { reset(); if (!SchedModel->hasInstrSchedModel()) @@ -1404,7 +1686,8 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { for (std::vector<SUnit>::iterator I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); - RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); + RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC) + * SchedModel->getMicroOpFactor(); for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { @@ -1413,26 +1696,61 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { RemainingCounts[PIdx] += (Factor * PI->Cycles); } } - for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds(); - PIdx != PEnd; ++PIdx) { - if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx]) - >= (int)SchedModel->getLatencyFactor()) { - CritResIdx = PIdx; - } - } } -void ConvergingScheduler::SchedBoundary:: +void GenericScheduler::SchedBoundary:: init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { reset(); DAG = dag; SchedModel = smodel; Rem = rem; if (SchedModel->hasInstrSchedModel()) - ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); + ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); +} + +/// Initialize the per-region scheduling policy. +void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) { + const TargetMachine &TM = Context->MF->getTarget(); + + // Avoid setting up the register pressure tracker for small regions to save + // compile time. As a rough heuristic, only track pressure when the number of + // schedulable instructions exceeds half the integer register file. + unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( + TM.getTargetLowering()->getRegClassFor(MVT::i32)); + + RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); + + // For generic targets, we default to bottom-up, because it's simpler and more + // compile-time optimizations have been implemented in that direction. + RegionPolicy.OnlyBottomUp = true; + + // Allow the subtarget to override default policy. + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs); + + // After subtarget overrides, apply command line options. + if (!EnableRegPressure) + RegionPolicy.ShouldTrackPressure = false; + + // Check -misched-topdown/bottomup can force or unforce scheduling direction. + // e.g. -misched-bottomup=false allows scheduling in both directions. + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); + if (ForceBottomUp.getNumOccurrences() > 0) { + RegionPolicy.OnlyBottomUp = ForceBottomUp; + if (RegionPolicy.OnlyBottomUp) + RegionPolicy.OnlyTopDown = false; + } + if (ForceTopDown.getNumOccurrences() > 0) { + RegionPolicy.OnlyTopDown = ForceTopDown; + if (RegionPolicy.OnlyTopDown) + RegionPolicy.OnlyBottomUp = false; + } } -void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { +void GenericScheduler::initialize(ScheduleDAGMI *dag) { DAG = dag; SchedModel = DAG->getSchedModel(); TRI = DAG->TRI; @@ -1447,31 +1765,36 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { // are disabled, then these HazardRecs will be disabled. const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); const TargetMachine &TM = DAG->MF.getTarget(); - Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); - Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); - - assert((!ForceTopDown || !ForceBottomUp) && - "-misched-topdown incompatible with -misched-bottomup"); + if (!Top.HazardRec) { + Top.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + } + if (!Bot.HazardRec) { + Bot.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + } } -void ConvergingScheduler::releaseTopNode(SUnit *SU) { +void GenericScheduler::releaseTopNode(SUnit *SU) { if (SU->isScheduled) return; for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { + if (I->isWeak()) + continue; unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned MinLatency = I->getMinLatency(); + unsigned Latency = I->getLatency(); #ifndef NDEBUG - Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); + Top.MaxObservedLatency = std::max(Latency, Top.MaxObservedLatency); #endif - if (SU->TopReadyCycle < PredReadyCycle + MinLatency) - SU->TopReadyCycle = PredReadyCycle + MinLatency; + if (SU->TopReadyCycle < PredReadyCycle + Latency) + SU->TopReadyCycle = PredReadyCycle + Latency; } Top.releaseNode(SU, SU->TopReadyCycle); } -void ConvergingScheduler::releaseBottomNode(SUnit *SU) { +void GenericScheduler::releaseBottomNode(SUnit *SU) { if (SU->isScheduled) return; @@ -1482,18 +1805,56 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) { if (I->isWeak()) continue; unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; - unsigned MinLatency = I->getMinLatency(); + unsigned Latency = I->getLatency(); #ifndef NDEBUG - Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); + Bot.MaxObservedLatency = std::max(Latency, Bot.MaxObservedLatency); #endif - if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) - SU->BotReadyCycle = SuccReadyCycle + MinLatency; + if (SU->BotReadyCycle < SuccReadyCycle + Latency) + SU->BotReadyCycle = SuccReadyCycle + Latency; } Bot.releaseNode(SU, SU->BotReadyCycle); } -void ConvergingScheduler::registerRoots() { +/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic +/// critical path by more cycles than it takes to drain the instruction buffer. +/// We estimate an upper bounds on in-flight instructions as: +/// +/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) +/// InFlightIterations = AcyclicPath / CyclesPerIteration +/// InFlightResources = InFlightIterations * LoopResources +/// +/// TODO: Check execution resources in addition to IssueCount. +void GenericScheduler::checkAcyclicLatency() { + if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) + return; + + // Scaled number of cycles per loop iteration. + unsigned IterCount = + std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), + Rem.RemIssueCount); + // Scaled acyclic critical path. + unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); + // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop + unsigned InFlightCount = + (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; + unsigned BufferLimit = + SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); + + Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; + + DEBUG(dbgs() << "IssueCycles=" + << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " + << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() + << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount + << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() + << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; + if (Rem.IsAcyclicLatencyLimited) + dbgs() << " ACYCLIC LATENCY LIMIT\n"); +} + +void GenericScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); + // Some roots may not feed into ExitSU. Check all of them in case. for (std::vector<SUnit*>::const_iterator I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { @@ -1501,6 +1862,11 @@ void ConvergingScheduler::registerRoots() { Rem.CriticalPath = (*I)->getDepth(); } DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); + + if (EnableCyclicPath) { + Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); + checkAcyclicLatency(); + } } /// Does this SU have a hazard within the current instruction group. @@ -1516,12 +1882,12 @@ void ConvergingScheduler::registerRoots() { /// can dispatch per cycle. /// /// TODO: Also check whether the SU must start a new group. -bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { +bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) { if (HazardRec->isEnabled()) return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); - if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { + if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); return true; @@ -1529,45 +1895,125 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { return false; } -/// Compute the remaining latency to determine whether ILP should be increased. -void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { - // FIXME: compile time. In all, we visit four queues here one we should only - // need to visit the one that was last popped if we cache the result. +// Find the unscheduled node in ReadySUs with the highest latency. +unsigned GenericScheduler::SchedBoundary:: +findMaxLatency(ArrayRef<SUnit*> ReadySUs) { + SUnit *LateSU = 0; unsigned RemLatency = 0; - for (ReadyQueue::iterator I = Available.begin(), E = Available.end(); + for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end(); I != E; ++I) { unsigned L = getUnscheduledLatency(*I); - DEBUG(dbgs() << " " << Available.getName() - << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n'); - if (L > RemLatency) + if (L > RemLatency) { RemLatency = L; + LateSU = *I; + } } - for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end(); - I != E; ++I) { - unsigned L = getUnscheduledLatency(*I); - if (L > RemLatency) - RemLatency = L; + if (LateSU) { + DEBUG(dbgs() << Available.getName() << " RemLatency SU(" + << LateSU->NodeNum << ") " << RemLatency << "c\n"); } - unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow(); - DEBUG(dbgs() << " " << Available.getName() - << " ExpectedLatency " << ExpectedLatency - << " CP Limit " << CriticalPathLimit << '\n'); - if (RemLatency + ExpectedLatency >= CriticalPathLimit - && RemLatency > Rem->getMaxRemainingCount(SchedModel)) { - Policy.ReduceLatency = true; - DEBUG(dbgs() << " Increase ILP: " << Available.getName() << '\n'); + return RemLatency; +} + +// Count resources in this zone and the remaining unscheduled +// instruction. Return the max count, scaled. Set OtherCritIdx to the critical +// resource index, or zero if the zone is issue limited. +unsigned GenericScheduler::SchedBoundary:: +getOtherResourceCount(unsigned &OtherCritIdx) { + OtherCritIdx = 0; + if (!SchedModel->hasInstrSchedModel()) + return 0; + + unsigned OtherCritCount = Rem->RemIssueCount + + (RetiredMOps * SchedModel->getMicroOpFactor()); + DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: " + << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); + for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); + PIdx != PEnd; ++PIdx) { + unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; + if (OtherCount > OtherCritCount) { + OtherCritCount = OtherCount; + OtherCritIdx = PIdx; + } + } + if (OtherCritIdx) { + DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " + << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) + << " " << getResourceName(OtherCritIdx) << "\n"); } + return OtherCritCount; } -void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, - unsigned ReadyCycle) { +/// Set the CandPolicy for this zone given the current resources and latencies +/// inside and outside the zone. +void GenericScheduler::SchedBoundary::setPolicy(CandPolicy &Policy, + SchedBoundary &OtherZone) { + // Now that potential stalls have been considered, apply preemptive heuristics + // based on the the total latency and resources inside and outside this + // zone. + + // Compute remaining latency. We need this both to determine whether the + // overall schedule has become latency-limited and whether the instructions + // outside this zone are resource or latency limited. + // + // The "dependent" latency is updated incrementally during scheduling as the + // max height/depth of scheduled nodes minus the cycles since it was + // scheduled: + // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone + // + // The "independent" latency is the max ready queue depth: + // ILat = max N.depth for N in Available|Pending + // + // RemainingLatency is the greater of independent and dependent latency. + unsigned RemLatency = DependentLatency; + RemLatency = std::max(RemLatency, findMaxLatency(Available.elements())); + RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements())); + + // Compute the critical resource outside the zone. + unsigned OtherCritIdx; + unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx); + + bool OtherResLimited = false; + if (SchedModel->hasInstrSchedModel()) { + unsigned LFactor = SchedModel->getLatencyFactor(); + OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; + } + if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) { + Policy.ReduceLatency |= true; + DEBUG(dbgs() << " " << Available.getName() << " RemainingLatency " + << RemLatency << " + " << CurrCycle << "c > CritPath " + << Rem->CriticalPath << "\n"); + } + // If the same resource is limiting inside and outside the zone, do nothing. + if (ZoneCritResIdx == OtherCritIdx) + return; + DEBUG( + if (IsResourceLimited) { + dbgs() << " " << Available.getName() << " ResourceLimited: " + << getResourceName(ZoneCritResIdx) << "\n"; + } + if (OtherResLimited) + dbgs() << " RemainingLimit: " << getResourceName(OtherCritIdx) << "\n"; + if (!IsResourceLimited && !OtherResLimited) + dbgs() << " Latency limited both directions.\n"); + + if (IsResourceLimited && !Policy.ReduceResIdx) + Policy.ReduceResIdx = ZoneCritResIdx; + + if (OtherResLimited) + Policy.DemandResIdx = OtherCritIdx; +} + +void GenericScheduler::SchedBoundary::releaseNode(SUnit *SU, + unsigned ReadyCycle) { if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; // Check for interlocks first. For the purpose of other heuristics, an // instruction that cannot issue appears as if it's not in the ReadyQueue. - if (ReadyCycle > CurrCycle || checkHazard(SU)) + bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; + if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU)) Pending.push(SU); else Available.push(SU); @@ -1577,16 +2023,21 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, } /// Move the boundary of scheduled code by one cycle. -void ConvergingScheduler::SchedBoundary::bumpCycle() { - unsigned Width = SchedModel->getIssueWidth(); - IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; - - unsigned NextCycle = CurrCycle + 1; - assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); - if (MinReadyCycle > NextCycle) { - IssueCount = 0; - NextCycle = MinReadyCycle; - } +void GenericScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) { + if (SchedModel->getMicroOpBufferSize() == 0) { + assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); + if (MinReadyCycle > NextCycle) + NextCycle = MinReadyCycle; + } + // Update the current micro-ops, which will issue in the next cycle. + unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); + CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; + + // Decrement DependentLatency based on the next cycle. + if ((NextCycle - CurrCycle) > DependentLatency) + DependentLatency = 0; + else + DependentLatency -= (NextCycle - CurrCycle); if (!HazardRec->isEnabled()) { // Bypass HazardRec virtual calls. @@ -1602,38 +2053,54 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() { } } CheckPending = true; - IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); + unsigned LFactor = SchedModel->getLatencyFactor(); + IsResourceLimited = + (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) + > (int)LFactor; + + DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); +} - DEBUG(dbgs() << " " << Available.getName() - << " Cycle: " << CurrCycle << '\n'); +void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx, + unsigned Count) { + ExecutedResCounts[PIdx] += Count; + if (ExecutedResCounts[PIdx] > MaxExecutedResCount) + MaxExecutedResCount = ExecutedResCounts[PIdx]; } /// Add the given processor resource to this scheduled zone. -void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, - unsigned Cycles) { +/// +/// \param Cycles indicates the number of consecutive (non-pipelined) cycles +/// during which this resource is consumed. +/// +/// \return the next cycle at which the instruction may execute without +/// oversubscribing resources. +unsigned GenericScheduler::SchedBoundary:: +countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) { unsigned Factor = SchedModel->getResourceFactor(PIdx); - DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name - << " +(" << Cycles << "x" << Factor - << ") / " << SchedModel->getLatencyFactor() << '\n'); - unsigned Count = Factor * Cycles; - ResourceCounts[PIdx] += Count; + DEBUG(dbgs() << " " << getResourceName(PIdx) + << " +" << Cycles << "x" << Factor << "u\n"); + + // Update Executed resources counts. + incExecutedResources(PIdx, Count); assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); Rem->RemainingCounts[PIdx] -= Count; - // Check if this resource exceeds the current critical resource by a full - // cycle. If so, it becomes the critical resource. - if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) - >= (int)SchedModel->getLatencyFactor()) { - CritResIdx = PIdx; + // Check if this resource exceeds the current critical resource. If so, it + // becomes the critical resource. + if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { + ZoneCritResIdx = PIdx; DEBUG(dbgs() << " *** Critical resource " - << SchedModel->getProcResource(PIdx)->Name << " x" - << ResourceCounts[PIdx] << '\n'); + << getResourceName(PIdx) << ": " + << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); } + // TODO: We don't yet model reserved resources. It's not hard though. + return CurrCycle; } /// Move the boundary of scheduled code by one SUnit. -void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { +void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { // Update the reservation table. if (HazardRec->isEnabled()) { if (!isTop() && SU->isCall) { @@ -1643,51 +2110,108 @@ void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { } HazardRec->EmitInstruction(SU); } + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); + CurrMOps += IncMOps; + // checkHazard prevents scheduling multiple instructions per cycle that exceed + // issue width. However, we commonly reach the maximum. In this case + // opportunistically bump the cycle to avoid uselessly checking everything in + // the readyQ. Furthermore, a single instruction may produce more than one + // cycle's worth of micro-ops. + // + // TODO: Also check if this SU must end a dispatch group. + unsigned NextCycle = CurrCycle; + if (CurrMOps >= SchedModel->getIssueWidth()) { + ++NextCycle; + DEBUG(dbgs() << " *** Max MOps " << CurrMOps + << " at cycle " << CurrCycle << '\n'); + } + unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); + DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); + + switch (SchedModel->getMicroOpBufferSize()) { + case 0: + assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); + break; + case 1: + if (ReadyCycle > NextCycle) { + NextCycle = ReadyCycle; + DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); + } + break; + default: + // We don't currently model the OOO reorder buffer, so consider all + // scheduled MOps to be "retired". + break; + } + RetiredMOps += IncMOps; + // Update resource counts and critical resource. if (SchedModel->hasInstrSchedModel()) { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); - Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC); + unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); + assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); + Rem->RemIssueCount -= DecRemIssue; + if (ZoneCritResIdx) { + // Scale scheduled micro-ops for comparing with the critical resource. + unsigned ScaledMOps = + RetiredMOps * SchedModel->getMicroOpFactor(); + + // If scaled micro-ops are now more than the previous critical resource by + // a full cycle, then micro-ops issue becomes critical. + if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) + >= (int)SchedModel->getLatencyFactor()) { + ZoneCritResIdx = 0; + DEBUG(dbgs() << " *** Critical resource NumMicroOps: " + << ScaledMOps / SchedModel->getLatencyFactor() << "c\n"); + } + } for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { - countResource(PI->ProcResourceIdx, PI->Cycles); + unsigned RCycle = + countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle); + if (RCycle > NextCycle) + NextCycle = RCycle; } } - if (isTop()) { - if (SU->getDepth() > ExpectedLatency) - ExpectedLatency = SU->getDepth(); + // Update ExpectedLatency and DependentLatency. + unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; + unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; + if (SU->getDepth() > TopLatency) { + TopLatency = SU->getDepth(); + DEBUG(dbgs() << " " << Available.getName() + << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n"); } - else { - if (SU->getHeight() > ExpectedLatency) - ExpectedLatency = SU->getHeight(); + if (SU->getHeight() > BotLatency) { + BotLatency = SU->getHeight(); + DEBUG(dbgs() << " " << Available.getName() + << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); } - - IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); - - // Check the instruction group dispatch limit. - // TODO: Check if this SU must end a dispatch group. - IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); - - // checkHazard prevents scheduling multiple instructions per cycle that exceed - // issue width. However, we commonly reach the maximum. In this case - // opportunistically bump the cycle to avoid uselessly checking everything in - // the readyQ. Furthermore, a single instruction may produce more than one - // cycle's worth of micro-ops. - if (IssueCount >= SchedModel->getIssueWidth()) { - DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n'); - bumpCycle(); + // If we stall for any reason, bump the cycle. + if (NextCycle > CurrCycle) { + bumpCycle(NextCycle); + } + else { + // After updating ZoneCritResIdx and ExpectedLatency, check if we're + // resource limited. If a stall occured, bumpCycle does this. + unsigned LFactor = SchedModel->getLatencyFactor(); + IsResourceLimited = + (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) + > (int)LFactor; } + DEBUG(dumpScheduledState()); } /// Release pending ready nodes in to the available queue. This makes them /// visible to heuristics. -void ConvergingScheduler::SchedBoundary::releasePending() { +void GenericScheduler::SchedBoundary::releasePending() { // If the available queue is empty, it is safe to reset MinReadyCycle. if (Available.empty()) MinReadyCycle = UINT_MAX; // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. + bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; for (unsigned i = 0, e = Pending.size(); i != e; ++i) { SUnit *SU = *(Pending.begin()+i); unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; @@ -1695,7 +2219,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() { if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; - if (ReadyCycle > CurrCycle) + if (!IsBuffered && ReadyCycle > CurrCycle) continue; if (checkHazard(SU)) @@ -1710,7 +2234,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() { } /// Remove SU from the ready set for this boundary. -void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { +void GenericScheduler::SchedBoundary::removeReady(SUnit *SU) { if (Available.isInQueue(SU)) Available.remove(Available.find(SU)); else { @@ -1722,11 +2246,11 @@ void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { /// If this queue only has one ready candidate, return it. As a side effect, /// defer any nodes that now hit a hazard, and advance the cycle until at least /// one node is ready. If multiple instructions are ready, return NULL. -SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { +SUnit *GenericScheduler::SchedBoundary::pickOnlyChoice() { if (CheckPending) releasePending(); - if (IssueCount > 0) { + if (CurrMOps > 0) { // Defer any ready instrs that now have a hazard. for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { if (checkHazard(*I)) { @@ -1738,9 +2262,9 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { } } for (unsigned i = 0; Available.empty(); ++i) { - assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && + assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) && "permanent hazard"); (void)i; - bumpCycle(); + bumpCycle(CurrCycle + 1); releasePending(); } if (Available.size() == 1) @@ -1748,106 +2272,33 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { return NULL; } -/// Record the candidate policy for opposite zones with different critical -/// resources. -/// -/// If the CriticalZone is latency limited, don't force a policy for the -/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed. -void ConvergingScheduler::balanceZones( - ConvergingScheduler::SchedBoundary &CriticalZone, - ConvergingScheduler::SchedCandidate &CriticalCand, - ConvergingScheduler::SchedBoundary &OppositeZone, - ConvergingScheduler::SchedCandidate &OppositeCand) { - - if (!CriticalZone.IsResourceLimited) - return; - assert(SchedModel->hasInstrSchedModel() && "required schedmodel"); - - SchedRemainder *Rem = CriticalZone.Rem; - - // If the critical zone is overconsuming a resource relative to the - // remainder, try to reduce it. - unsigned RemainingCritCount = - Rem->RemainingCounts[CriticalZone.CritResIdx]; - if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount) - > (int)SchedModel->getLatencyFactor()) { - CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; - DEBUG(dbgs() << " Balance " << CriticalZone.Available.getName() - << " reduce " - << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name - << '\n'); - } - // If the other zone is underconsuming a resource relative to the full zone, - // try to increase it. - unsigned OppositeCount = - OppositeZone.ResourceCounts[CriticalZone.CritResIdx]; - if ((int)(OppositeZone.ExpectedCount - OppositeCount) - > (int)SchedModel->getLatencyFactor()) { - OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; - DEBUG(dbgs() << " Balance " << OppositeZone.Available.getName() - << " demand " - << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name - << '\n'); - } -} - -/// Determine if the scheduled zones exceed resource limits or critical path and -/// set each candidate's ReduceHeight policy accordingly. -void ConvergingScheduler::checkResourceLimits( - ConvergingScheduler::SchedCandidate &TopCand, - ConvergingScheduler::SchedCandidate &BotCand) { - - // Set ReduceLatency to true if needed. - Bot.setLatencyPolicy(BotCand.Policy); - Top.setLatencyPolicy(TopCand.Policy); - - // Handle resource-limited regions. - if (Top.IsResourceLimited && Bot.IsResourceLimited - && Top.CritResIdx == Bot.CritResIdx) { - // If the scheduled critical resource in both zones is no longer the - // critical remaining resource, attempt to reduce resource height both ways. - if (Top.CritResIdx != Rem.CritResIdx) { - TopCand.Policy.ReduceResIdx = Top.CritResIdx; - BotCand.Policy.ReduceResIdx = Bot.CritResIdx; - DEBUG(dbgs() << " Reduce scheduled " - << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); - } - return; - } - // Handle latency-limited regions. - if (!Top.IsResourceLimited && !Bot.IsResourceLimited) { - // If the total scheduled expected latency exceeds the region's critical - // path then reduce latency both ways. - // - // Just because a zone is not resource limited does not mean it is latency - // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle - // to exceed expected latency. - if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath) - && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { - TopCand.Policy.ReduceLatency = true; - BotCand.Policy.ReduceLatency = true; - DEBUG(dbgs() << " Reduce scheduled latency " << Top.ExpectedLatency - << " + " << Bot.ExpectedLatency << '\n'); - } - return; +#ifndef NDEBUG +// This is useful information to dump after bumpNode. +// Note that the Queue contents are more useful before pickNodeFromQueue. +void GenericScheduler::SchedBoundary::dumpScheduledState() { + unsigned ResFactor; + unsigned ResCount; + if (ZoneCritResIdx) { + ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); + ResCount = getResourceCount(ZoneCritResIdx); } - // The critical resource is different in each zone, so request balancing. - - // Compute the cost of each zone. - Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); - Top.ExpectedCount = std::max( - Top.getCriticalCount(), - Top.ExpectedCount * SchedModel->getLatencyFactor()); - Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle); - Bot.ExpectedCount = std::max( - Bot.getCriticalCount(), - Bot.ExpectedCount * SchedModel->getLatencyFactor()); - - balanceZones(Top, TopCand, Bot, BotCand); - balanceZones(Bot, BotCand, Top, TopCand); + else { + ResFactor = SchedModel->getMicroOpFactor(); + ResCount = RetiredMOps * SchedModel->getMicroOpFactor(); + } + unsigned LFactor = SchedModel->getLatencyFactor(); + dbgs() << Available.getName() << " @" << CurrCycle << "c\n" + << " Retired: " << RetiredMOps; + dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; + dbgs() << "\n Critical: " << ResCount / LFactor << "c, " + << ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx) + << "\n ExpectedLatency: " << ExpectedLatency << "c\n" + << (IsResourceLimited ? " - Resource" : " - Latency") + << " limited.\n"; } +#endif -void ConvergingScheduler::SchedCandidate:: +void GenericScheduler::SchedCandidate:: initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { if (!Policy.ReduceResIdx && !Policy.DemandResIdx) @@ -1864,11 +2315,12 @@ initResourceDelta(const ScheduleDAGMI *DAG, } } + /// Return true if this heuristic determines order. static bool tryLess(int TryVal, int CandVal, - ConvergingScheduler::SchedCandidate &TryCand, - ConvergingScheduler::SchedCandidate &Cand, - ConvergingScheduler::CandReason Reason) { + GenericScheduler::SchedCandidate &TryCand, + GenericScheduler::SchedCandidate &Cand, + GenericScheduler::CandReason Reason) { if (TryVal < CandVal) { TryCand.Reason = Reason; return true; @@ -1878,13 +2330,14 @@ static bool tryLess(int TryVal, int CandVal, Cand.Reason = Reason; return true; } + Cand.setRepeat(Reason); return false; } static bool tryGreater(int TryVal, int CandVal, - ConvergingScheduler::SchedCandidate &TryCand, - ConvergingScheduler::SchedCandidate &Cand, - ConvergingScheduler::CandReason Reason) { + GenericScheduler::SchedCandidate &TryCand, + GenericScheduler::SchedCandidate &Cand, + GenericScheduler::CandReason Reason) { if (TryVal > CandVal) { TryCand.Reason = Reason; return true; @@ -1894,9 +2347,34 @@ static bool tryGreater(int TryVal, int CandVal, Cand.Reason = Reason; return true; } + Cand.setRepeat(Reason); return false; } +static bool tryPressure(const PressureChange &TryP, + const PressureChange &CandP, + GenericScheduler::SchedCandidate &TryCand, + GenericScheduler::SchedCandidate &Cand, + GenericScheduler::CandReason Reason) { + int TryRank = TryP.getPSetOrMax(); + int CandRank = CandP.getPSetOrMax(); + // If both candidates affect the same set, go with the smallest increase. + if (TryRank == CandRank) { + return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, + Reason); + } + // If one candidate decreases and the other increases, go with it. + // Invalid candidates have UnitInc==0. + if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, + Reason)) { + return true; + } + // If the candidates are decreasing pressure, reverse priority. + if (TryP.getUnitInc() < 0) + std::swap(TryRank, CandRank); + return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); +} + static unsigned getWeakLeft(const SUnit *SU, bool isTop) { return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; } @@ -1929,6 +2407,32 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) { return 0; } +static bool tryLatency(GenericScheduler::SchedCandidate &TryCand, + GenericScheduler::SchedCandidate &Cand, + GenericScheduler::SchedBoundary &Zone) { + if (Zone.isTop()) { + if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, GenericScheduler::TopDepthReduce)) + return true; + } + if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, GenericScheduler::TopPathReduce)) + return true; + } + else { + if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, GenericScheduler::BotHeightReduce)) + return true; + } + if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, GenericScheduler::BotPathReduce)) + return true; + } + return false; +} + /// Apply a set of heursitics to a new candidate. Heuristics are currently /// hierarchical. This may be more efficient than a graduated cost model because /// we don't need to evaluate all aspects of the model for each node in the @@ -1940,16 +2444,44 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) { /// \param Zone describes the scheduled zone that we are extending. /// \param RPTracker describes reg pressure within the scheduled zone. /// \param TempTracker is a scratch pressure tracker to reuse in queries. -void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, +void GenericScheduler::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary &Zone, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker) { - // Always initialize TryCand's RPDelta. - TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta, - DAG->getRegionCriticalPSets(), - DAG->getRegPressure().MaxSetPressure); + if (DAG->isTrackingPressure()) { + // Always initialize TryCand's RPDelta. + if (Zone.isTop()) { + TempTracker.getMaxDownwardPressureDelta( + TryCand.SU->getInstr(), + TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + } + else { + if (VerifyScheduling) { + TempTracker.getMaxUpwardPressureDelta( + TryCand.SU->getInstr(), + &DAG->getPressureDiff(TryCand.SU), + TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + } + else { + RPTracker.getUpwardPressureDelta( + TryCand.SU->getInstr(), + DAG->getPressureDiff(TryCand.SU), + TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + } + } + } + DEBUG(if (TryCand.RPDelta.Excess.isValid()) + dbgs() << " SU(" << TryCand.SU->NodeNum << ") " + << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet()) + << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n"); // Initialize the candidate if needed. if (!Cand.isValid()) { @@ -1962,20 +2494,25 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, TryCand, Cand, PhysRegCopy)) return; - // Avoid exceeding the target's limit. - if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, - Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) + // Avoid exceeding the target's limit. If signed PSetID is negative, it is + // invalid; convert it to INT_MAX to give it lowest priority. + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, + Cand.RPDelta.Excess, + TryCand, Cand, RegExcess)) return; - if (Cand.Reason == SingleExcess) - Cand.Reason = MultiPressure; // Avoid increasing the max critical pressure in the scheduled region. - if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease, - Cand.RPDelta.CriticalMax.UnitIncrease, - TryCand, Cand, SingleCritical)) + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, + Cand.RPDelta.CriticalMax, + TryCand, Cand, RegCritical)) + return; + + // For loops that are acyclic path limited, aggressively schedule for latency. + // This can result in very long dependence chains scheduled in sequence, so + // once every cycle (when CurrMOps == 0), switch to normal heuristics. + if (Rem.IsAcyclicLatencyLimited && !Zone.CurrMOps + && tryLatency(TryCand, Cand, Zone)) return; - if (Cand.Reason == SingleCritical) - Cand.Reason = MultiPressure; // Keep clustered nodes together to encourage downstream peephole // optimizations which may reduce resource requirements. @@ -1990,17 +2527,17 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, return; // Weak edges are for clustering and other constraints. - // - // Deferring TryCand here does not change Cand's reason. This is good in the - // sense that a bad candidate shouldn't affect a previous candidate's - // goodness, but bad in that it is assymetric and depends on queue order. - CandReason OrigReason = Cand.Reason; if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), getWeakLeft(Cand.SU, Zone.isTop()), TryCand, Cand, Weak)) { - Cand.Reason = OrigReason; return; } + // Avoid increasing the max pressure of the entire region. + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, + Cand.RPDelta.CurrentMax, + TryCand, Cand, RegMax)) + return; + // Avoid critical resource consumption and balance the schedule. TryCand.initResourceDelta(DAG, SchedModel); if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, @@ -2012,41 +2549,15 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, return; // Avoid serializing long latency dependence chains. - if (Cand.Policy.ReduceLatency) { - if (Zone.isTop()) { - if (Cand.SU->getDepth() * SchedModel->getLatencyFactor() - > Zone.ExpectedCount) { - if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, TopDepthReduce)) - return; - } - if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, TopPathReduce)) - return; - } - else { - if (Cand.SU->getHeight() * SchedModel->getLatencyFactor() - > Zone.ExpectedCount) { - if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, BotHeightReduce)) - return; - } - if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, BotPathReduce)) - return; - } - } - - // Avoid increasing the max pressure of the entire region. - if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease, - Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax)) + // For acyclic path limited loops, latency was already checked above. + if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited + && tryLatency(TryCand, Cand, Zone)) { return; - if (Cand.Reason == SingleMax) - Cand.Reason = MultiPressure; + } // Prefer immediate defs/users of the last scheduled instruction. This is a - // nice pressure avoidance strategy that also conserves the processor's - // register renaming resources and keeps the machine code readable. + // local pressure avoidance strategy that also makes the machine code + // readable. if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), TryCand, Cand, NextDefUse)) return; @@ -2058,49 +2569,17 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, } } -/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is -/// more desirable than RHS from scheduling standpoint. -static bool compareRPDelta(const RegPressureDelta &LHS, - const RegPressureDelta &RHS) { - // Compare each component of pressure in decreasing order of importance - // without checking if any are valid. Invalid PressureElements are assumed to - // have UnitIncrease==0, so are neutral. - - // Avoid increasing the max critical pressure in the scheduled region. - if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { - DEBUG(dbgs() << " RP excess top - bot: " - << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); - return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; - } - // Avoid increasing the max critical pressure in the scheduled region. - if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { - DEBUG(dbgs() << " RP critical top - bot: " - << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) - << '\n'); - return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; - } - // Avoid increasing the max pressure of the entire region. - if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { - DEBUG(dbgs() << " RP current top - bot: " - << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) - << '\n'); - return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; - } - return false; -} - #ifndef NDEBUG -const char *ConvergingScheduler::getReasonStr( - ConvergingScheduler::CandReason Reason) { +const char *GenericScheduler::getReasonStr( + GenericScheduler::CandReason Reason) { switch (Reason) { case NoCand: return "NOCAND "; case PhysRegCopy: return "PREG-COPY"; - case SingleExcess: return "REG-EXCESS"; - case SingleCritical: return "REG-CRIT "; + case RegExcess: return "REG-EXCESS"; + case RegCritical: return "REG-CRIT "; case Cluster: return "CLUSTER "; case Weak: return "WEAK "; - case SingleMax: return "REG-MAX "; - case MultiPressure: return "REG-MULTI "; + case RegMax: return "REG-MAX "; case ResourceReduce: return "RES-REDUCE"; case ResourceDemand: return "RES-DEMAND"; case TopDepthReduce: return "TOP-DEPTH "; @@ -2113,20 +2592,20 @@ const char *ConvergingScheduler::getReasonStr( llvm_unreachable("Unknown reason!"); } -void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) { - PressureElement P; +void GenericScheduler::traceCandidate(const SchedCandidate &Cand) { + PressureChange P; unsigned ResIdx = 0; unsigned Latency = 0; switch (Cand.Reason) { default: break; - case SingleExcess: + case RegExcess: P = Cand.RPDelta.Excess; break; - case SingleCritical: + case RegCritical: P = Cand.RPDelta.CriticalMax; break; - case SingleMax: + case RegMax: P = Cand.RPDelta.CurrentMax; break; case ResourceReduce: @@ -2150,8 +2629,8 @@ void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) { } dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); if (P.isValid()) - dbgs() << " " << TRI->getRegPressureSetName(P.PSetID) - << ":" << P.UnitIncrease << " "; + dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) + << ":" << P.getUnitInc() << " "; else dbgs() << " "; if (ResIdx) @@ -2166,12 +2645,12 @@ void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) { } #endif -/// Pick the best candidate from the top queue. +/// Pick the best candidate from the queue. /// /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during /// DAG building. To adjust for the current scheduling location we need to /// maintain the number of vreg uses remaining to be top-scheduled. -void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, +void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, const RegPressureTracker &RPTracker, SchedCandidate &Cand) { ReadyQueue &Q = Zone.Available; @@ -2196,30 +2675,31 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, } } -static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, +static void tracePick(const GenericScheduler::SchedCandidate &Cand, bool IsTop) { DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") - << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); + << GenericScheduler::getReasonStr(Cand.Reason) << '\n'); } /// Pick the best candidate node from either the top or bottom queue. -SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { +SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { // Schedule as far as possible in the direction of no choice. This is most // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { IsTopNode = false; - DEBUG(dbgs() << "Pick Top NOCAND\n"); + DEBUG(dbgs() << "Pick Bot NOCAND\n"); return SU; } if (SUnit *SU = Top.pickOnlyChoice()) { IsTopNode = true; - DEBUG(dbgs() << "Pick Bot NOCAND\n"); + DEBUG(dbgs() << "Pick Top NOCAND\n"); return SU; } CandPolicy NoPolicy; SchedCandidate BotCand(NoPolicy); SchedCandidate TopCand(NoPolicy); - checkResourceLimits(TopCand, BotCand); + Bot.setPolicy(BotCand.Policy, Top); + Top.setPolicy(TopCand.Policy, Bot); // Prefer bottom scheduling when heuristics are silent. pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); @@ -2232,7 +2712,10 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { // affects picking from either Q. If scheduling in one direction must // increase pressure for one of the excess PSets, then schedule in that // direction first to provide more freedom in the other direction. - if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) { + if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess)) + || (BotCand.Reason == RegCritical + && !BotCand.isRepeat(RegCritical))) + { IsTopNode = false; tracePick(BotCand, IsTopNode); return BotCand.SU; @@ -2241,37 +2724,20 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); assert(TopCand.Reason != NoCand && "failed to find the first candidate"); - // If either Q has a single candidate that minimizes pressure above the - // original region's pressure pick it. - if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) { - if (TopCand.Reason < BotCand.Reason) { - IsTopNode = true; - tracePick(TopCand, IsTopNode); - return TopCand.SU; - } - IsTopNode = false; - tracePick(BotCand, IsTopNode); - return BotCand.SU; - } - // Check for a salient pressure difference and pick the best from either side. - if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { - IsTopNode = true; - tracePick(TopCand, IsTopNode); - return TopCand.SU; - } - // Otherwise prefer the bottom candidate, in node order if all else failed. + // Choose the queue with the most important (lowest enum) reason. if (TopCand.Reason < BotCand.Reason) { IsTopNode = true; tracePick(TopCand, IsTopNode); return TopCand.SU; } + // Otherwise prefer the bottom candidate, in node order if all else failed. IsTopNode = false; tracePick(BotCand, IsTopNode); return BotCand.SU; } /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. -SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { +SUnit *GenericScheduler::pickNode(bool &IsTopNode) { if (DAG->top() == DAG->bottom()) { assert(Top.Available.empty() && Top.Pending.empty() && Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); @@ -2279,24 +2745,26 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { } SUnit *SU; do { - if (ForceTopDown) { + if (RegionPolicy.OnlyTopDown) { SU = Top.pickOnlyChoice(); if (!SU) { CandPolicy NoPolicy; SchedCandidate TopCand(NoPolicy); pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); - assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + assert(TopCand.Reason != NoCand && "failed to find a candidate"); + tracePick(TopCand, true); SU = TopCand.SU; } IsTopNode = true; } - else if (ForceBottomUp) { + else if (RegionPolicy.OnlyBottomUp) { SU = Bot.pickOnlyChoice(); if (!SU) { CandPolicy NoPolicy; SchedCandidate BotCand(NoPolicy); pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); - assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + assert(BotCand.Reason != NoCand && "failed to find a candidate"); + tracePick(BotCand, false); SU = BotCand.SU; } IsTopNode = false; @@ -2315,7 +2783,7 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { return SU; } -void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { +void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { MachineBasicBlock::iterator InsertPos = SU->getInstr(); if (!isTop) @@ -2346,15 +2814,15 @@ void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { /// /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling /// them here. See comments in biasPhysRegCopy. -void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { +void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { - SU->TopReadyCycle = Top.CurrCycle; + SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle); Top.bumpNode(SU); if (SU->hasPhysRegUses) reschedulePhysRegCopies(SU, true); } else { - SU->BotReadyCycle = Bot.CurrCycle; + SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle); Bot.bumpNode(SU); if (SU->hasPhysRegDefs) reschedulePhysRegCopies(SU, false); @@ -2363,26 +2831,23 @@ void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// Create the standard converging machine scheduler. This will be used as the /// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { - assert((!ForceTopDown || !ForceBottomUp) && - "-misched-topdown incompatible with -misched-bottomup"); - ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler()); +static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C) { + ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new GenericScheduler(C)); // Register DAG post-processors. // // FIXME: extend the mutation API to allow earlier mutations to instantiate // data and pass it to later mutations. Have a single mutation that gathers // the interesting nodes in one pass. - if (EnableCopyConstrain) - DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI)); - if (EnableLoadCluster) + DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI)); + if (EnableLoadCluster && DAG->TII->enableClusterLoads()) DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); if (EnableMacroFusion) DAG->addMutation(new MacroFusion(DAG->TII)); return DAG; } static MachineSchedRegistry -ConvergingSchedRegistry("converge", "Standard converging scheduler.", - createConvergingSched); +GenericSchedRegistry("converge", "Standard converging scheduler.", + createGenericSched); //===----------------------------------------------------------------------===// // ILP Scheduler. Currently for experimental analysis of heuristics. @@ -2424,15 +2889,6 @@ struct ILPOrder { /// \brief Schedule based on the ILP metric. class ILPScheduler : public MachineSchedStrategy { - /// In case all subtrees are eventually connected to a common root through - /// data dependence (e.g. reduction), place an upper limit on their size. - /// - /// FIXME: A subtree limit is generally good, but in the situation commented - /// above, where multiple similar subtrees feed a common root, we should - /// only split at a point where the resulting subtrees will be balanced. - /// (a motivating test case must be found). - static const unsigned SubtreeLimit = 16; - ScheduleDAGMI *DAG; ILPOrder Cmp; @@ -2616,7 +3072,7 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { } static bool isNodeHidden(const SUnit *Node) { - return (Node->NumPreds > 10 || Node->NumSuccs > 10); + return (Node->Preds.size() > 10 || Node->Succs.size() > 10); } static bool hasNodeAddressLabel(const SUnit *Node, @@ -2639,7 +3095,11 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { std::string Str; raw_string_ostream SS(Str); - SS << "SU(" << SU->NodeNum << ')'; + const SchedDFSResult *DFS = + static_cast<const ScheduleDAGMI*>(G)->getDFSResult(); + SS << "SU:" << SU->NodeNum; + if (DFS) + SS << " I:" << DFS->getNumInstrs(SU); return SS.str(); } static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp index 4dafbe5..105d7c2 100644 --- a/contrib/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -308,12 +308,29 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, // to be sunk then it's probably worth it. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; + if (!MO.isReg() || !MO.isUse()) + continue; unsigned Reg = MO.getReg(); - if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Reg == 0) continue; - if (MRI->hasOneNonDBGUse(Reg)) - return true; + + // We don't move live definitions of physical registers, + // so sinking their uses won't enable any opportunities. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + + // If this instruction is the only user of a virtual register, + // check if breaking the edge will enable sinking + // both this instruction and the defining instruction. + if (MRI->hasOneNonDBGUse(Reg)) { + // If the definition resides in same MBB, + // claim it's likely we can sink these together. + // If definition resides elsewhere, we aren't + // blocking it from being sunk so don't break the edge. + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (DefMI->getParent() == MI->getParent()) + return true; + } } return false; @@ -394,7 +411,7 @@ static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) { /// collectDebgValues - Scan instructions following MI and collect any /// matching DBG_VALUEs. static void collectDebugValues(MachineInstr *MI, - SmallVector<MachineInstr *, 2> & DbgValues) { + SmallVectorImpl<MachineInstr *> &DbgValues) { DbgValues.clear(); if (!MI->getOperand(0).isReg()) return; @@ -537,8 +554,8 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // We give successors with smaller loop depth higher priority. SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end()); std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI)); - for (SmallVector<MachineBasicBlock*, 4>::iterator SI = Succs.begin(), - E = Succs.end(); SI != E; ++SI) { + for (SmallVectorImpl<MachineBasicBlock *>::iterator SI = Succs.begin(), + E = Succs.end(); SI != E; ++SI) { MachineBasicBlock *SuccBlock = *SI; bool LocalUse = false; if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, @@ -615,9 +632,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo); - // If the block has multiple predecessors, this would introduce computation on - // a path that it doesn't already exist. We could split the critical edge, - // but for now we just punt. + // If the block has multiple predecessors, this is a critical edge. + // Decide if we can sink along it or need to break the edge. if (SuccToSinkTo->pred_size() > 1) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. @@ -697,7 +713,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { ++MachineBasicBlock::iterator(MI)); // Move debug values. - for (SmallVector<MachineInstr *, 2>::iterator DBI = DbgValuesToSink.begin(), + for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(), DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) { MachineInstr *DbgMI = *DBI; SuccToSinkTo->splice(InsertPos, ParentBlock, DbgMI, diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 00f702c..6aa3f67 100644 --- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -853,8 +853,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) { // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) DepCycle += MTM.SchedModel - .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp, - /* FindMin = */ false); + .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp); Cycle = std::max(Cycle, DepCycle); } // Remember the instruction depth. @@ -902,8 +901,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, // We may not know the UseMI of this dependency, if it came from the // live-in list. SchedModel can handle a NULL UseMI. DepHeight += SchedModel - .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op, - /* FindMin = */ false); + .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op); } Height = std::max(Height, DepHeight); // This regunit is dead above MI. @@ -941,7 +939,7 @@ static bool pushDepHeight(const DataDep &Dep, // Adjust height by Dep.DefMI latency. if (!Dep.DefMI->isTransient()) UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, - UseMI, Dep.UseOp, false); + UseMI, Dep.UseOp); // Update Heights[DefMI] to be the maximum height seen. MIHeightMap::iterator I; @@ -1171,7 +1169,7 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) DepCycle += TE.MTM.SchedModel - .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp, false); + .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp); return DepCycle; } diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index 037043f..d61470c 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -212,6 +213,10 @@ namespace { const LiveInterval &LI); void report(const char *msg, const MachineBasicBlock *MBB, const LiveInterval &LI); + void report(const char *msg, const MachineFunction *MF, + const LiveRange &LR); + void report(const char *msg, const MachineBasicBlock *MBB, + const LiveRange &LR); void verifyInlineAsm(const MachineInstr *MI); @@ -224,9 +229,12 @@ namespace { void verifyLiveVariables(); void verifyLiveIntervals(); void verifyLiveInterval(const LiveInterval&); - void verifyLiveIntervalValue(const LiveInterval&, VNInfo*); - void verifyLiveIntervalSegment(const LiveInterval&, - LiveInterval::const_iterator); + void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned); + void verifyLiveRangeSegment(const LiveRange&, + const LiveRange::const_iterator I, unsigned); + void verifyLiveRange(const LiveRange&, unsigned); + + void verifyStackFrame(); }; struct MachineVerifierPass : public MachineFunctionPass { @@ -268,8 +276,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { raw_ostream *OutFile = 0; if (OutFileName) { std::string ErrorInfo; - OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, - raw_fd_ostream::F_Append); + OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, sys::fs::F_Append); if (!ErrorInfo.empty()) { errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n'; exit(1); @@ -412,23 +419,25 @@ void MachineVerifier::report(const char *msg, void MachineVerifier::report(const char *msg, const MachineFunction *MF, const LiveInterval &LI) { report(msg, MF); - *OS << "- interval: "; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) - *OS << PrintReg(LI.reg, TRI); - else - *OS << PrintRegUnit(LI.reg, TRI); - *OS << ' ' << LI << '\n'; + *OS << "- interval: " << LI << '\n'; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, const LiveInterval &LI) { report(msg, MBB); - *OS << "- interval: "; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) - *OS << PrintReg(LI.reg, TRI); - else - *OS << PrintRegUnit(LI.reg, TRI); - *OS << ' ' << LI << '\n'; + *OS << "- interval: " << LI << '\n'; +} + +void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, + const LiveRange &LR) { + report(msg, MBB); + *OS << "- liverange: " << LR << "\n"; +} + +void MachineVerifier::report(const char *msg, const MachineFunction *MF, + const LiveRange &LR) { + report(msg, MF); + *OS << "- liverange: " << LR << "\n"; } void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { @@ -475,6 +484,8 @@ void MachineVerifier::visitMachineFunctionBefore() { // Check that the register use lists are sane. MRI->verifyUseLists(); + + verifyStackFrame(); } // Does iterator point to a and b as the first two elements? @@ -669,8 +680,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB live-in list contains non-physical register", MBB); continue; } - regsLive.insert(*I); - for (MCSubRegIterator SubRegs(*I, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(*I, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) regsLive.insert(*SubRegs); } regsLiveInButUnused = regsLive; @@ -679,8 +690,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { assert(MFI && "Function has no frame info"); BitVector PR = MFI->getPristineRegs(MBB); for (int I = PR.find_first(); I>0; I = PR.find_next(I)) { - regsLive.insert(I); - for (MCSubRegIterator SubRegs(I, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) regsLive.insert(*SubRegs); } @@ -764,7 +775,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { if (MI->getNumOperands() < MCID.getNumOperands()) { report("Too few operands", MI); *OS << MCID.getNumOperands() << " operands expected, but " - << MI->getNumExplicitOperands() << " given.\n"; + << MI->getNumOperands() << " given.\n"; } // Check the tied operands. @@ -822,7 +833,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MO->isReg() && !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) { if (MO->isDef() && !MCOI.isOptionalDef()) - report("Explicit operand marked as def", MO, MONum); + report("Explicit operand marked as def", MO, MONum); if (MO->isImplicit()) report("Explicit operand marked as implicit", MO, MONum); } @@ -997,16 +1008,16 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Check the cached regunit intervals. if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) { for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { - if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) { - LiveRangeQuery LRQ(*LI, UseIdx); + if (const LiveRange *LR = LiveInts->getCachedRegUnit(*Units)) { + LiveQueryResult LRQ = LR->Query(UseIdx); if (!LRQ.valueIn()) { - report("No live range at use", MO, MONum); + report("No live segment at use", MO, MONum); *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI) - << ' ' << *LI << '\n'; + << ' ' << *LR << '\n'; } if (MO->isKill() && !LRQ.isKill()) { report("Live range continues after kill flag", MO, MONum); - *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n'; + *OS << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n'; } } } @@ -1016,9 +1027,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (LiveInts->hasInterval(Reg)) { // This is a virtual register interval. const LiveInterval &LI = LiveInts->getInterval(Reg); - LiveRangeQuery LRQ(LI, UseIdx); + LiveQueryResult LRQ = LI.Query(UseIdx); if (!LRQ.valueIn()) { - report("No live range at use", MO, MONum); + report("No live segment at use", MO, MONum); *OS << UseIdx << " is not live in " << LI << '\n'; } // Check for extra kill flags. @@ -1067,7 +1078,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { llvm::next(MRI->def_begin(Reg)) != MRI->def_end()) report("Multiple virtual register defs in SSA form", MO, MONum); - // Check LiveInts for a live range, but only for virtual registers. + // Check LiveInts for a live segment, but only for virtual registers. if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && !LiveInts->isNotInMIMap(MI)) { SlotIndex DefIdx = LiveInts->getInstructionIndex(MI); @@ -1082,9 +1093,17 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { << DefIdx << " in " << LI << '\n'; } } else { - report("No live range at def", MO, MONum); + report("No live segment at def", MO, MONum); *OS << DefIdx << " is not live in " << LI << '\n'; } + // Check that, if the dead def flag is present, LiveInts agree. + if (MO->isDead()) { + LiveQueryResult LRQ = LI.Query(DefIdx); + if (!LRQ.isDeadDef()) { + report("Live range continues after dead def flag", MO, MONum); + *OS << "Live range: " << LI << '\n'; + } + } } else { report("Virtual register has no Live interval", MO, MONum); } @@ -1331,25 +1350,26 @@ void MachineVerifier::verifyLiveIntervals() { // Verify all the cached regunit intervals. for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) - if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i)) - verifyLiveInterval(*LI); + if (const LiveRange *LR = LiveInts->getCachedRegUnit(i)) + verifyLiveRange(*LR, i); } -void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, - VNInfo *VNI) { +void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, + const VNInfo *VNI, + unsigned Reg) { if (VNI->isUnused()) return; - const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def); + const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def); if (!DefVNI) { - report("Valno not live at def and not marked unused", MF, LI); + report("Valno not live at def and not marked unused", MF, LR); *OS << "Valno #" << VNI->id << '\n'; return; } if (DefVNI != VNI) { - report("Live range at def has different valno", MF, LI); + report("Live segment at def has different valno", MF, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " where valno #" << DefVNI->id << " is live\n"; return; @@ -1357,15 +1377,15 @@ void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); if (!MBB) { - report("Invalid definition index", MF, LI); + report("Invalid definition index", MF, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; + << " in " << LR << '\n'; return; } if (VNI->isPHIDef()) { if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { - report("PHIDef value is not defined at MBB start", MBB, LI); + report("PHIDef value is not defined at MBB start", MBB, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << ", not at the beginning of BB#" << MBB->getNumber() << '\n'; } @@ -1375,161 +1395,154 @@ void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, // Non-PHI def. const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); if (!MI) { - report("No instruction at def index", MBB, LI); + report("No instruction at def index", MBB, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; return; } - bool hasDef = false; - bool isEarlyClobber = false; - for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || !MOI->isDef()) - continue; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - if (MOI->getReg() != LI.reg) - continue; - } else { - if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || - !TRI->hasRegUnit(MOI->getReg(), LI.reg)) + if (Reg != 0) { + bool hasDef = false; + bool isEarlyClobber = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (MOI->getReg() != Reg) + continue; + } else { + if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || + !TRI->hasRegUnit(MOI->getReg(), Reg)) + continue; + } + hasDef = true; + if (MOI->isEarlyClobber()) + isEarlyClobber = true; } - hasDef = true; - if (MOI->isEarlyClobber()) - isEarlyClobber = true; - } - if (!hasDef) { - report("Defining instruction does not modify register", MI); - *OS << "Valno #" << VNI->id << " in " << LI << '\n'; - } + if (!hasDef) { + report("Defining instruction does not modify register", MI); + *OS << "Valno #" << VNI->id << " in " << LR << '\n'; + } - // Early clobber defs begin at USE slots, but other defs must begin at - // DEF slots. - if (isEarlyClobber) { - if (!VNI->def.isEarlyClobber()) { - report("Early clobber def must be at an early-clobber slot", MBB, LI); + // Early clobber defs begin at USE slots, but other defs must begin at + // DEF slots. + if (isEarlyClobber) { + if (!VNI->def.isEarlyClobber()) { + report("Early clobber def must be at an early-clobber slot", MBB, LR); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + } + } else if (!VNI->def.isRegister()) { + report("Non-PHI, non-early clobber def must be at a register slot", + MBB, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; } - } else if (!VNI->def.isRegister()) { - report("Non-PHI, non-early clobber def must be at a register slot", - MBB, LI); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; } } -void -MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, - LiveInterval::const_iterator I) { - const VNInfo *VNI = I->valno; - assert(VNI && "Live range has no valno"); - - if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) { - report("Foreign valno in live range", MF, LI); - *OS << *I << " has a bad valno\n"; +void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, + const LiveRange::const_iterator I, + unsigned Reg) { + const LiveRange::Segment &S = *I; + const VNInfo *VNI = S.valno; + assert(VNI && "Live segment has no valno"); + + if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) { + report("Foreign valno in live segment", MF, LR); + *OS << S << " has a bad valno\n"; } if (VNI->isUnused()) { - report("Live range valno is marked unused", MF, LI); - *OS << *I << '\n'; + report("Live segment valno is marked unused", MF, LR); + *OS << S << '\n'; } - const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start); + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start); if (!MBB) { - report("Bad start of live segment, no basic block", MF, LI); - *OS << *I << '\n'; + report("Bad start of live segment, no basic block", MF, LR); + *OS << S << '\n'; return; } SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); - if (I->start != MBBStartIdx && I->start != VNI->def) { - report("Live segment must begin at MBB entry or valno def", MBB, LI); - *OS << *I << '\n'; + if (S.start != MBBStartIdx && S.start != VNI->def) { + report("Live segment must begin at MBB entry or valno def", MBB, LR); + *OS << S << '\n'; } const MachineBasicBlock *EndMBB = - LiveInts->getMBBFromIndex(I->end.getPrevSlot()); + LiveInts->getMBBFromIndex(S.end.getPrevSlot()); if (!EndMBB) { - report("Bad end of live segment, no basic block", MF, LI); - *OS << *I << '\n'; + report("Bad end of live segment, no basic block", MF, LR); + *OS << S << '\n'; return; } // No more checks for live-out segments. - if (I->end == LiveInts->getMBBEndIdx(EndMBB)) + if (S.end == LiveInts->getMBBEndIdx(EndMBB)) return; // RegUnit intervals are allowed dead phis. - if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() && - I->start == VNI->def && I->end == VNI->def.getDeadSlot()) + if (!TargetRegisterInfo::isVirtualRegister(Reg) && VNI->isPHIDef() && + S.start == VNI->def && S.end == VNI->def.getDeadSlot()) return; // The live segment is ending inside EndMBB const MachineInstr *MI = - LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); + LiveInts->getInstructionFromIndex(S.end.getPrevSlot()); if (!MI) { - report("Live segment doesn't end at a valid instruction", EndMBB, LI); - *OS << *I << '\n'; + report("Live segment doesn't end at a valid instruction", EndMBB, LR); + *OS << S << '\n'; return; } // The block slot must refer to a basic block boundary. - if (I->end.isBlock()) { - report("Live segment ends at B slot of an instruction", EndMBB, LI); - *OS << *I << '\n'; + if (S.end.isBlock()) { + report("Live segment ends at B slot of an instruction", EndMBB, LR); + *OS << S << '\n'; } - if (I->end.isDead()) { + if (S.end.isDead()) { // Segment ends on the dead slot. // That means there must be a dead def. - if (!SlotIndex::isSameInstr(I->start, I->end)) { - report("Live segment ending at dead slot spans instructions", EndMBB, LI); - *OS << *I << '\n'; + if (!SlotIndex::isSameInstr(S.start, S.end)) { + report("Live segment ending at dead slot spans instructions", EndMBB, LR); + *OS << S << '\n'; } } // A live segment can only end at an early-clobber slot if it is being // redefined by an early-clobber def. - if (I->end.isEarlyClobber()) { - if (I+1 == LI.end() || (I+1)->start != I->end) { + if (S.end.isEarlyClobber()) { + if (I+1 == LR.end() || (I+1)->start != S.end) { report("Live segment ending at early clobber slot must be " - "redefined by an EC def in the same instruction", EndMBB, LI); - *OS << *I << '\n'; + "redefined by an EC def in the same instruction", EndMBB, LR); + *OS << S << '\n'; } } // The following checks only apply to virtual registers. Physreg liveness // is too weird to check. - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - // A live range can end with either a redefinition, a kill flag on a + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // A live segment can end with either a redefinition, a kill flag on a // use, or a dead flag on a def. bool hasRead = false; - bool hasDeadDef = false; for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || MOI->getReg() != LI.reg) + if (!MOI->isReg() || MOI->getReg() != Reg) continue; if (MOI->readsReg()) hasRead = true; - if (MOI->isDef() && MOI->isDead()) - hasDeadDef = true; } - - if (I->end.isDead()) { - if (!hasDeadDef) { - report("Instruction doesn't have a dead def operand", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } else { + if (!S.end.isDead()) { if (!hasRead) { - report("Instruction ending live range doesn't read the register", MI); - *OS << *I << " in " << LI << '\n'; + report("Instruction ending live segment doesn't read the register", MI); + *OS << S << " in " << LR << '\n'; } } } // Now check all the basic blocks in this live segment. MachineFunction::const_iterator MFI = MBB; - // Is this live range the beginning of a non-PHIDef VN? - if (I->start == VNI->def && !VNI->isPHIDef()) { + // Is this live segment the beginning of a non-PHIDef VN? + if (S.start == VNI->def && !VNI->isPHIDef()) { // Not live-in to any blocks. if (MBB == EndMBB) return; @@ -1537,9 +1550,9 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, ++MFI; } for (;;) { - assert(LiveInts->isLiveInToMBB(LI, MFI)); + assert(LiveInts->isLiveInToMBB(LR, MFI)); // We don't know how to track physregs into a landing pad. - if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && + if (!TargetRegisterInfo::isVirtualRegister(Reg) && MFI->isLandingPad()) { if (&*MFI == EndMBB) break; @@ -1555,11 +1568,11 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), PE = MFI->pred_end(); PI != PE; ++PI) { SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); - const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); + const VNInfo *PVNI = LR.getVNInfoBefore(PEnd); // All predecessors must have a live-out value. if (!PVNI) { - report("Register not marked live out of predecessor", *PI, LI); + report("Register not marked live out of predecessor", *PI, LR); *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " << PEnd << '\n'; @@ -1568,7 +1581,7 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, // Only PHI-defs can take different predecessor values. if (!IsPHI && PVNI != VNI) { - report("Different value live out of predecessor", *PI, LI); + report("Different value live out of predecessor", *PI, LR); *OS << "Valno #" << PVNI->id << " live out of BB#" << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() @@ -1581,13 +1594,17 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, } } -void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); - I!=E; ++I) - verifyLiveIntervalValue(LI, *I); +void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg) { + for (LiveRange::const_vni_iterator I = LR.vni_begin(), E = LR.vni_end(); + I != E; ++I) + verifyLiveRangeValue(LR, *I, Reg); + + for (LiveRange::const_iterator I = LR.begin(), E = LR.end(); I != E; ++I) + verifyLiveRangeSegment(LR, I, Reg); +} - for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) - verifyLiveIntervalSegment(LI, I); +void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { + verifyLiveRange(LI, LI.reg); // Check the LI only has one connected component. if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { @@ -1606,3 +1623,130 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { } } } + +namespace { + // FrameSetup and FrameDestroy can have zero adjustment, so using a single + // integer, we can't tell whether it is a FrameSetup or FrameDestroy if the + // value is zero. + // We use a bool plus an integer to capture the stack state. + struct StackStateOfBB { + StackStateOfBB() : EntryValue(0), ExitValue(0), EntryIsSetup(false), + ExitIsSetup(false) { } + StackStateOfBB(int EntryVal, int ExitVal, bool EntrySetup, bool ExitSetup) : + EntryValue(EntryVal), ExitValue(ExitVal), EntryIsSetup(EntrySetup), + ExitIsSetup(ExitSetup) { } + // Can be negative, which means we are setting up a frame. + int EntryValue; + int ExitValue; + bool EntryIsSetup; + bool ExitIsSetup; + }; +} + +/// Make sure on every path through the CFG, a FrameSetup <n> is always followed +/// by a FrameDestroy <n>, stack adjustments are identical on all +/// CFG edges to a merge point, and frame is destroyed at end of a return block. +void MachineVerifier::verifyStackFrame() { + int FrameSetupOpcode = TII->getCallFrameSetupOpcode(); + int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); + + SmallVector<StackStateOfBB, 8> SPState; + SPState.resize(MF->getNumBlockIDs()); + SmallPtrSet<const MachineBasicBlock*, 8> Reachable; + + // Visit the MBBs in DFS order. + for (df_ext_iterator<const MachineFunction*, + SmallPtrSet<const MachineBasicBlock*, 8> > + DFI = df_ext_begin(MF, Reachable), DFE = df_ext_end(MF, Reachable); + DFI != DFE; ++DFI) { + const MachineBasicBlock *MBB = *DFI; + + StackStateOfBB BBState; + // Check the exit state of the DFS stack predecessor. + if (DFI.getPathLength() >= 2) { + const MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2); + assert(Reachable.count(StackPred) && + "DFS stack predecessor is already visited.\n"); + BBState.EntryValue = SPState[StackPred->getNumber()].ExitValue; + BBState.EntryIsSetup = SPState[StackPred->getNumber()].ExitIsSetup; + BBState.ExitValue = BBState.EntryValue; + BBState.ExitIsSetup = BBState.EntryIsSetup; + } + + // Update stack state by checking contents of MBB. + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (I->getOpcode() == FrameSetupOpcode) { + // The first operand of a FrameOpcode should be i32. + int Size = I->getOperand(0).getImm(); + assert(Size >= 0 && + "Value should be non-negative in FrameSetup and FrameDestroy.\n"); + + if (BBState.ExitIsSetup) + report("FrameSetup is after another FrameSetup", I); + BBState.ExitValue -= Size; + BBState.ExitIsSetup = true; + } + + if (I->getOpcode() == FrameDestroyOpcode) { + // The first operand of a FrameOpcode should be i32. + int Size = I->getOperand(0).getImm(); + assert(Size >= 0 && + "Value should be non-negative in FrameSetup and FrameDestroy.\n"); + + if (!BBState.ExitIsSetup) + report("FrameDestroy is not after a FrameSetup", I); + int AbsSPAdj = BBState.ExitValue < 0 ? -BBState.ExitValue : + BBState.ExitValue; + if (BBState.ExitIsSetup && AbsSPAdj != Size) { + report("FrameDestroy <n> is after FrameSetup <m>", I); + *OS << "FrameDestroy <" << Size << "> is after FrameSetup <" + << AbsSPAdj << ">.\n"; + } + BBState.ExitValue += Size; + BBState.ExitIsSetup = false; + } + } + SPState[MBB->getNumber()] = BBState; + + // Make sure the exit state of any predecessor is consistent with the entry + // state. + for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), + E = MBB->pred_end(); I != E; ++I) { + if (Reachable.count(*I) && + (SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue || + SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) { + report("The exit stack state of a predecessor is inconsistent.", MBB); + *OS << "Predecessor BB#" << (*I)->getNumber() << " has exit state (" + << SPState[(*I)->getNumber()].ExitValue << ", " + << SPState[(*I)->getNumber()].ExitIsSetup + << "), while BB#" << MBB->getNumber() << " has entry state (" + << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n"; + } + } + + // Make sure the entry state of any successor is consistent with the exit + // state. + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + if (Reachable.count(*I) && + (SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue || + SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) { + report("The entry stack state of a successor is inconsistent.", MBB); + *OS << "Successor BB#" << (*I)->getNumber() << " has entry state (" + << SPState[(*I)->getNumber()].EntryValue << ", " + << SPState[(*I)->getNumber()].EntryIsSetup + << "), while BB#" << MBB->getNumber() << " has exit state (" + << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n"; + } + } + + // Make sure a basic block with return ends with zero stack adjustment. + if (!MBB->empty() && MBB->back().isReturn()) { + if (BBState.ExitIsSetup) + report("A return block ends with a FrameSetup.", MBB); + if (BBState.ExitValue) + report("A return block ends with a nonzero stack adjustment.", MBB); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp index 5584708..dcd9072 100644 --- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp @@ -66,7 +66,7 @@ namespace { /// bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); void LowerPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt); + MachineBasicBlock::iterator LastPHIIt); /// analyzePHINodes - Gather information about the PHI nodes in /// here. In particular, we want to map the number of uses of a virtual @@ -185,10 +185,11 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF, // Get an iterator to the first instruction after the last PHI node (this may // also be the end of the basic block). - MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin()); + MachineBasicBlock::iterator LastPHIIt = + prior(MBB.SkipPHIsAndLabels(MBB.begin())); while (MBB.front().isPHI()) - LowerPHINode(MBB, AfterPHIsIt); + LowerPHINode(MBB, LastPHIIt); return true; } @@ -218,8 +219,11 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, /// LowerPHINode - Lower the PHI node at the top of the specified block, /// void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt) { + MachineBasicBlock::iterator LastPHIIt) { ++NumLowered; + + MachineBasicBlock::iterator AfterPHIsIt = llvm::next(LastPHIIt); + // Unlink the PHI node from the basic block, but don't delete the PHI yet. MachineInstr *MPhi = MBB.remove(MBB.begin()); @@ -309,14 +313,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (IncomingReg) { // Add the region from the beginning of MBB to the copy instruction to // IncomingReg's live interval. - LiveInterval &IncomingLI = LIS->getOrCreateInterval(IncomingReg); + LiveInterval &IncomingLI = LIS->createEmptyInterval(IncomingReg); VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex); if (!IncomingVNI) IncomingVNI = IncomingLI.getNextValue(MBBStartIndex, LIS->getVNInfoAllocator()); - IncomingLI.addRange(LiveRange(MBBStartIndex, - DestCopyIndex.getRegSlot(), - IncomingVNI)); + IncomingLI.addSegment(LiveInterval::Segment(MBBStartIndex, + DestCopyIndex.getRegSlot(), + IncomingVNI)); } LiveInterval &DestLI = LIS->getInterval(DestReg); @@ -328,14 +332,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // the copy instruction. VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex); assert(OrigDestVNI && "PHI destination should be live at block entry."); - DestLI.removeRange(MBBStartIndex, MBBStartIndex.getDeadSlot()); + DestLI.removeSegment(MBBStartIndex, MBBStartIndex.getDeadSlot()); DestLI.createDeadDef(DestCopyIndex.getRegSlot(), LIS->getVNInfoAllocator()); DestLI.removeValNo(OrigDestVNI); } else { // Otherwise, remove the region from the beginning of MBB to the copy // instruction from DestReg's live interval. - DestLI.removeRange(MBBStartIndex, DestCopyIndex.getRegSlot()); + DestLI.removeSegment(MBBStartIndex, DestCopyIndex.getRegSlot()); VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); assert(DestVNI && "PHI destination should be live at its definition."); DestVNI->def = DestCopyIndex.getRegSlot(); @@ -456,7 +460,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (LIS) { if (NewSrcInstr) { LIS->InsertMachineInstrInMaps(NewSrcInstr); - LIS->addLiveRangeToEndOfBlock(IncomingReg, NewSrcInstr); + LIS->addSegmentToEndOfBlock(IncomingReg, NewSrcInstr); } if (!SrcUndef && @@ -507,8 +511,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, "Cannot find kill instruction"); SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst); - SrcLI.removeRange(LastUseIndex.getRegSlot(), - LIS->getMBBEndIdx(&opBlock)); + SrcLI.removeSegment(LastUseIndex.getRegSlot(), + LIS->getMBBEndIdx(&opBlock)); } } } diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h index 9ac47fb4..48234ae 100644 --- a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h +++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h @@ -1,4 +1,4 @@ -//=- PHIEliminationUtils.h - Helper functions for PHI elimination *- C++ -*--=// +//=- PHIEliminationUtils.h - Helper functions for PHI elimination -*- C++ -*-=// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp index bfbc062..f4ffd03 100644 --- a/contrib/llvm/lib/CodeGen/Passes.cpp +++ b/contrib/llvm/lib/CodeGen/Passes.cpp @@ -58,8 +58,6 @@ OptimizeRegAlloc("optimize-regalloc", cl::Hidden, static cl::opt<cl::boolOrDefault> EnableMachineSched("enable-misched", cl::Hidden, cl::desc("Enable the machine instruction scheduling pass.")); -static cl::opt<bool> EnableStrongPHIElim("strong-phi-elim", cl::Hidden, - cl::desc("Use strong PHI elimination.")); static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); @@ -236,7 +234,7 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) // Temporarily disable experimental passes. const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>(); - if (!ST.enableMachineScheduler()) + if (!ST.useMachineScheduler()) disablePass(&MachineSchedulerID); } @@ -300,6 +298,8 @@ void TargetPassConfig::addPass(Pass *P) { if (Started && !Stopped) PM->add(P); + else + delete P; if (StopAfter == PassID) Stopped = true; if (StartAfter == PassID) @@ -331,7 +331,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { addPass(P); // Ends the lifetime of P. // Add the passes after the pass P if there is any. - for (SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4>::iterator + for (SmallVectorImpl<std::pair<AnalysisID, IdentifyingPassPtr> >::iterator I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end(); I != E; ++I) { if ((*I).first == PassID) { @@ -396,7 +396,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { // removed from the parent invoke(s). This could happen when a landing // pad is shared by multiple invokes and is also a target of a normal // edge from elsewhere. - addPass(createSjLjEHPreparePass(TM->getTargetLowering())); + addPass(createSjLjEHPreparePass(TM)); // FALLTHROUGH case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: @@ -404,7 +404,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { addPass(createDwarfEHPass(TM)); break; case ExceptionHandling::None: - addPass(createLowerInvokePass(TM->getTargetLowering())); + addPass(createLowerInvokePass(TM)); // The lower invoke pass may create unreachable code. Remove it. addPass(createUnreachableBlockEliminationPass()); @@ -416,13 +416,13 @@ void TargetPassConfig::addPassesToHandleExceptions() { /// before exception handling preparation passes. void TargetPassConfig::addCodeGenPrepare() { if (getOptLevel() != CodeGenOpt::None && !DisableCGP) - addPass(createCodeGenPreparePass(getTargetLowering())); + addPass(createCodeGenPreparePass(TM)); } /// Add common passes that perform LLVM IR to IR transforms in preparation for /// instruction selection. void TargetPassConfig::addISelPrepare() { - addPass(createStackProtectorPass(getTargetLowering())); + addPass(createStackProtectorPass(TM)); addPreISel(); @@ -673,24 +673,15 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // preferably fix the scavenger to not depend on them). addPass(&LiveVariablesID); - // Add passes that move from transformed SSA into conventional SSA. This is a - // "copy coalescing" problem. - // - if (!EnableStrongPHIElim) { - // Edge splitting is smarter with machine loop info. - addPass(&MachineLoopInfoID); - addPass(&PHIEliminationID); - } + // Edge splitting is smarter with machine loop info. + addPass(&MachineLoopInfoID); + addPass(&PHIEliminationID); // Eventually, we want to run LiveIntervals before PHI elimination. if (EarlyLiveIntervals) addPass(&LiveIntervalsID); addPass(&TwoAddressInstructionPassID); - - if (EnableStrongPHIElim) - addPass(&StrongPHIEliminationID); - addPass(&RegisterCoalescerID); // PreRA instruction scheduling. diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp index a7439b5..28f2d2f 100644 --- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -40,20 +40,30 @@ // If the branch instruction can use flag from "sub", then we can replace // "sub" with "subs" and eliminate the "cmp" instruction. // -// - Optimize Bitcast pairs: -// -// v1 = bitcast v0 -// v2 = bitcast v1 -// = v2 -// => -// v1 = bitcast v0 -// = v0 -// // - Optimize Loads: // // Loads that can be folded into a later instruction. A load is foldable // if it loads to virtual registers and the virtual register defined has // a single use. +// +// - Optimize Copies and Bitcast: +// +// Rewrite copies and bitcasts to avoid cross register bank copies +// when possible. +// E.g., Consider the following example, where capital and lower +// letters denote different register file: +// b = copy A <-- cross-bank copy +// C = copy b <-- cross-bank copy +// => +// b = copy A <-- cross-bank copy +// C = copy A <-- same-bank copy +// +// E.g., for bitcast: +// b = bitcast A <-- cross-bank copy +// C = bitcast b <-- cross-bank copy +// => +// b = bitcast A <-- cross-bank copy +// C = copy A <-- same-bank copy //===----------------------------------------------------------------------===// #define DEBUG_TYPE "peephole-opt" @@ -81,11 +91,11 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer")); STATISTIC(NumReuse, "Number of extension results reused"); -STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); STATISTIC(NumLoadFold, "Number of loads folded"); STATISTIC(NumSelects, "Number of selects optimized"); +STATISTIC(NumCopiesBitcasts, "Number of copies/bitcasts optimized"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -112,11 +122,11 @@ namespace { } private: - bool optimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs); bool optimizeSelect(MachineInstr *MI); + bool optimizeCopyOrBitcast(MachineInstr *MI); bool isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); @@ -298,78 +308,6 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, return Changed; } -/// optimizeBitcastInstr - If the instruction is a bitcast instruction A that -/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast -/// a value cross register classes), and the source is defined by another -/// bitcast instruction B. And if the register class of source of B matches -/// the register class of instruction A, then it is legal to replace all uses -/// of the def of A with source of B. e.g. -/// %vreg0<def> = VMOVSR %vreg1 -/// %vreg3<def> = VMOVRS %vreg0 -/// Replace all uses of vreg3 with vreg1. - -bool PeepholeOptimizer::optimizeBitcastInstr(MachineInstr *MI, - MachineBasicBlock *MBB) { - unsigned NumDefs = MI->getDesc().getNumDefs(); - unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs; - if (NumDefs != 1) - return false; - - unsigned Def = 0; - unsigned Src = 0; - for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - if (MO.isDef()) - Def = Reg; - else if (Src) - // Multiple sources? - return false; - else - Src = Reg; - } - - assert(Def && Src && "Malformed bitcast instruction!"); - - MachineInstr *DefMI = MRI->getVRegDef(Src); - if (!DefMI || !DefMI->isBitcast()) - return false; - - unsigned SrcSrc = 0; - NumDefs = DefMI->getDesc().getNumDefs(); - NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs; - if (NumDefs != 1) - return false; - for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { - const MachineOperand &MO = DefMI->getOperand(i); - if (!MO.isReg() || MO.isDef()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - if (!MO.isDef()) { - if (SrcSrc) - // Multiple sources? - return false; - else - SrcSrc = Reg; - } - } - - if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def)) - return false; - - MRI->replaceRegWith(Def, SrcSrc); - MRI->clearKillFlags(SrcSrc); - MI->eraseFromParent(); - ++NumBitcasts; - return true; -} - /// optimizeCmpInstr - If the instruction is a compare and the previous /// instruction it's comparing against all ready sets (or could be modified to /// set) the same flag as the compare, then we can remove the comparison and use @@ -411,6 +349,150 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) { return true; } +/// \brief Check if the registers defined by the pair (RegisterClass, SubReg) +/// share the same register file. +static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, + const TargetRegisterClass *DefRC, + unsigned DefSubReg, + const TargetRegisterClass *SrcRC, + unsigned SrcSubReg) { + // Same register class. + if (DefRC == SrcRC) + return true; + + // Both operands are sub registers. Check if they share a register class. + unsigned SrcIdx, DefIdx; + if (SrcSubReg && DefSubReg) + return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, + SrcIdx, DefIdx) != NULL; + // At most one of the register is a sub register, make it Src to avoid + // duplicating the test. + if (!SrcSubReg) { + std::swap(DefSubReg, SrcSubReg); + std::swap(DefRC, SrcRC); + } + + // One of the register is a sub register, check if we can get a superclass. + if (SrcSubReg) + return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != NULL; + // Plain copy. + return TRI.getCommonSubClass(DefRC, SrcRC) != NULL; +} + +/// \brief Get the index of the definition and source for \p Copy +/// instruction. +/// \pre Copy.isCopy() or Copy.isBitcast(). +/// \return True if the Copy instruction has only one register source +/// and one register definition. Otherwise, \p DefIdx and \p SrcIdx +/// are invalid. +static bool getCopyOrBitcastDefUseIdx(const MachineInstr &Copy, + unsigned &DefIdx, unsigned &SrcIdx) { + assert((Copy.isCopy() || Copy.isBitcast()) && "Wrong operation type."); + if (Copy.isCopy()) { + // Copy instruction are supposed to be: Def = Src. + if (Copy.getDesc().getNumOperands() != 2) + return false; + DefIdx = 0; + SrcIdx = 1; + assert(Copy.getOperand(DefIdx).isDef() && "Use comes before def!"); + return true; + } + // Bitcast case. + // Bitcasts with more than one def are not supported. + if (Copy.getDesc().getNumDefs() != 1) + return false; + // Initialize SrcIdx to an undefined operand. + SrcIdx = Copy.getDesc().getNumOperands(); + for (unsigned OpIdx = 0, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; ++OpIdx) { + const MachineOperand &MO = Copy.getOperand(OpIdx); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef()) + DefIdx = OpIdx; + else if (SrcIdx != EndOpIdx) + // Multiple sources? + return false; + SrcIdx = OpIdx; + } + return true; +} + +/// \brief Optimize a copy or bitcast instruction to avoid cross +/// register bank copy. The optimization looks through a chain of +/// copies and try to find a source that has a compatible register +/// class. +/// Two register classes are considered to be compatible if they share +/// the same register bank. +/// New copies issued by this optimization are register allocator +/// friendly. This optimization does not remove any copy as it may +/// overconstraint the register allocator, but replaces some when +/// possible. +/// \pre \p MI is a Copy (MI->isCopy() is true) +/// \return True, when \p MI has been optimized. In that case, \p MI has +/// been removed from its parent. +bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) { + unsigned DefIdx, SrcIdx; + if (!MI || !getCopyOrBitcastDefUseIdx(*MI, DefIdx, SrcIdx)) + return false; + + const MachineOperand &MODef = MI->getOperand(DefIdx); + assert(MODef.isReg() && "Copies must be between registers."); + unsigned Def = MODef.getReg(); + + if (TargetRegisterInfo::isPhysicalRegister(Def)) + return false; + + const TargetRegisterClass *DefRC = MRI->getRegClass(Def); + unsigned DefSubReg = MODef.getSubReg(); + + unsigned Src; + unsigned SrcSubReg; + bool ShouldRewrite = false; + MachineInstr *Copy = MI; + const TargetRegisterInfo &TRI = *TM->getRegisterInfo(); + + // Follow the chain of copies until we reach the top or find a + // more suitable source. + do { + unsigned CopyDefIdx, CopySrcIdx; + if (!getCopyOrBitcastDefUseIdx(*Copy, CopyDefIdx, CopySrcIdx)) + break; + const MachineOperand &MO = Copy->getOperand(CopySrcIdx); + assert(MO.isReg() && "Copies must be between registers."); + Src = MO.getReg(); + + if (TargetRegisterInfo::isPhysicalRegister(Src)) + break; + + const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); + SrcSubReg = MO.getSubReg(); + + // If this source does not incur a cross register bank copy, use it. + ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC, + SrcSubReg); + // Follow the chain of copies: get the definition of Src. + Copy = MRI->getVRegDef(Src); + } while (!ShouldRewrite && Copy && (Copy->isCopy() || Copy->isBitcast())); + + // If we did not find a more suitable source, there is nothing to optimize. + if (!ShouldRewrite || Src == MI->getOperand(SrcIdx).getReg()) + return false; + + // Rewrite the copy to avoid a cross register bank penalty. + unsigned NewVR = TargetRegisterInfo::isPhysicalRegister(Def) ? Def : + MRI->createVirtualRegister(DefRC); + MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVR) + .addReg(Src, 0, SrcSubReg); + NewCopy->getOperand(0).setSubReg(DefSubReg); + + MRI->replaceRegWith(Def, NewVR); + MRI->clearKillFlags(NewVR); + MI->eraseFromParent(); + ++NumCopiesBitcasts; + return true; +} + /// isLoadFoldable - Check whether MI is a candidate for folding into a later /// instruction. We only fold loads to virtual registers and the virtual /// register defined has a single use. @@ -523,7 +605,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MI->mayStore() || MI->isCall()) FoldAsLoadDefReg = 0; - if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) || + if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) || (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || (MI->isSelect() && optimizeSelect(MI))) { // MI is deleted. diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp index 53fe273..1afc1ec 100644 --- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -127,6 +127,12 @@ namespace { /// The schedule. Null SUnit*'s represent noop instructions. std::vector<SUnit*> Sequence; + /// The index in BB of RegionEnd. + /// + /// This is the instruction number from the top of the current block, not + /// the SlotIndex. It is only used by the AntiDepBreaker. + unsigned EndIndex; + public: SchedulePostRATDList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, @@ -141,11 +147,14 @@ namespace { /// void startBlock(MachineBasicBlock *BB); + // Set the index of RegionEnd within the current BB. + void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; } + /// Initialize the scheduler state for the next scheduling region. virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, - unsigned endcount); + unsigned regioninstrs); /// Notify that the scheduler has finished scheduling the current region. virtual void exitRegion(); @@ -197,7 +206,7 @@ SchedulePostRATDList::SchedulePostRATDList( TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs) : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), - LiveRegs(TRI->getNumRegs()) + LiveRegs(TRI->getNumRegs()), EndIndex(0) { const TargetMachine &TM = MF.getTarget(); const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); @@ -223,8 +232,8 @@ SchedulePostRATDList::~SchedulePostRATDList() { void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, - unsigned endcount) { - ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + unsigned regioninstrs) { + ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); Sequence.clear(); } @@ -312,20 +321,21 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { unsigned Count = MBB->size(), CurrentCount = Count; for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { MachineInstr *MI = llvm::prior(I); + --Count; // Calls are not scheduling boundaries before register allocation, but // post-ra we don't gain anything by scheduling across calls since we // don't need to worry about register pressure. if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) { - Scheduler.enterRegion(MBB, I, Current, CurrentCount); + Scheduler.enterRegion(MBB, I, Current, CurrentCount - Count); + Scheduler.setEndIndex(CurrentCount); Scheduler.schedule(); Scheduler.exitRegion(); Scheduler.EmitSchedule(); Current = MI; - CurrentCount = Count - 1; + CurrentCount = Count; Scheduler.Observe(MI, CurrentCount); } I = MI; - --Count; if (MI->isBundle()) Count -= MI->getBundleSize(); } @@ -333,6 +343,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { assert((MBB->begin() == Current || CurrentCount != 0) && "Instruction count mismatch!"); Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.setEndIndex(CurrentCount); Scheduler.schedule(); Scheduler.exitRegion(); Scheduler.EmitSchedule(); @@ -424,9 +435,9 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; - LiveRegs.set(Reg); - // Repeat, for all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + // Repeat, for reg and all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) LiveRegs.set(*SubRegs); } } @@ -496,20 +507,19 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { // Ignore two-addr defs. if (MI->isRegTiedToUseOperand(i)) continue; - LiveRegs.reset(Reg); - - // Repeat for all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + // Repeat for reg and all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) LiveRegs.reset(*SubRegs); } // Examine all used registers and set/clear kill flag. When a // register is used multiple times we only set the kill flag on - // the first use. + // the first use. Don't set kill flags on undef operands. killedRegs.reset(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) continue; + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; unsigned Reg = MO.getReg(); if ((Reg == 0) || MRI.isReserved(Reg)) continue; @@ -548,9 +558,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { unsigned Reg = MO.getReg(); if ((Reg == 0) || MRI.isReserved(Reg)) continue; - LiveRegs.set(Reg); - - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) LiveRegs.set(*SubRegs); } } diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index e4e18c3..0c5173a 100644 --- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -78,7 +78,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { unsigned Reg = MI->getOperand(0).getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { - // For virtual regiusters, mark all uses as <undef>, and convert users to + // For virtual registers, mark all uses as <undef>, and convert users to // implicit-def when possible. for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg), diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 9487cbd..b0e494f 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -14,9 +14,6 @@ // This pass must be run after register allocation. After this pass is // executed, it is illegal to construct MO_FrameIndex operands. // -// This pass provides an optional shrink wrapping variant of prolog/epilog -// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp. -// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "pei" @@ -36,6 +33,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -47,6 +45,11 @@ using namespace llvm; char PEI::ID = 0; char &llvm::PrologEpilogCodeInserterID = PEI::ID; +static cl::opt<unsigned> +WarnStackSize("warn-stack-size", cl::Hidden, cl::init((unsigned)-1), + cl::desc("Warn for stack size bigger than the given" + " number")); + INITIALIZE_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) @@ -60,6 +63,38 @@ STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); STATISTIC(NumBytesStackSpace, "Number of bytes used for stack in all functions"); +void PEI::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<TargetPassConfig>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool PEI::isReturnBlock(MachineBasicBlock* MBB) { + return (MBB && !MBB->empty() && MBB->back().isReturn()); +} + +/// Compute the set of return blocks +void PEI::calculateSets(MachineFunction &Fn) { + // Sets used to compute spill, restore placement sets. + const std::vector<CalleeSavedInfo> &CSI = + Fn.getFrameInfo()->getCalleeSavedInfo(); + + // If no CSRs used, we are done. + if (CSI.empty()) + return; + + // Save refs to entry and return blocks. + EntryBlock = Fn.begin(); + for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); + MBB != E; ++MBB) + if (isReturnBlock(MBB)) + ReturnBlocks.push_back(MBB); + + return; +} + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// @@ -87,16 +122,11 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { calculateCalleeSavedRegisters(Fn); // Determine placement of CSR spill/restore code: - // - With shrink wrapping, place spills and restores to tightly - // enclose regions in the Machine CFG of the function where - // they are used. - // - Without shink wrapping (default), place all spills in the - // entry block, all restores in return blocks. - placeCSRSpillsAndRestores(Fn); + // place all spills in the entry block, all restores in return blocks. + calculateSets(Fn); // Add the code to save and restore the callee saved registers - if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::Naked)) + if (!F->hasFnAttribute(Attribute::Naked)) insertCSRSpillsAndRestores(Fn); // Allow the target machine to make final modifications to the function @@ -111,8 +141,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. - if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::Naked)) + if (!F->hasFnAttribute(Attribute::Naked)) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references @@ -129,8 +158,15 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Clear any vregs created by virtual scavenging. Fn.getRegInfo().clearVirtRegs(); + // Warn on stack size when we exceeds the given limit. + MachineFrameInfo *MFI = Fn.getFrameInfo(); + if (WarnStackSize.getNumOccurrences() > 0 && + WarnStackSize < MFI->getStackSize()) + errs() << "warning: Stack size limit exceeded (" << MFI->getStackSize() + << ") in " << Fn.getName() << ".\n"; + delete RS; - clearAllSets(); + ReturnBlocks.clear(); return true; } @@ -208,8 +244,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { return; // In Naked functions we aren't going to save any registers. - if (F.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::Naked)) + if (F.getFunction()->hasFnAttribute(Attribute::Naked)) return; std::vector<CalleeSavedInfo> CSI; @@ -273,7 +308,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { } /// insertCSRSpillsAndRestores - Insert spill and restore code for -/// callee saved registers used in the function, handling shrink wrapping. +/// callee saved registers used in the function. /// void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Get callee saved register information. @@ -291,133 +326,33 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); MachineBasicBlock::iterator I; - if (!ShrinkWrapThisFunction) { - // Spill using target interface. - I = EntryBlock->begin(); - if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - // Add the callee-saved register as live-in. - // It's killed at the spill. - EntryBlock->addLiveIn(CSI[i].getReg()); - - // Insert the spill to the stack frame. - unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, - CSI[i].getFrameIdx(), RC, TRI); - } - } - - // Restore using target interface. - for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { - MachineBasicBlock* MBB = ReturnBlocks[ri]; - I = MBB->end(); --I; - - // Skip over all terminator instructions, which are part of the return - // sequence. - MachineBasicBlock::iterator I2 = I; - while (I2 != MBB->begin() && (--I2)->isTerminator()) - I = I2; - - bool AtStart = I == MBB->begin(); - MachineBasicBlock::iterator BeforeI = I; - if (!AtStart) - --BeforeI; - - // Restore all registers immediately before the return and any - // terminators that precede it. - if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(*MBB, I, Reg, - CSI[i].getFrameIdx(), - RC, TRI); - assert(I != MBB->begin() && - "loadRegFromStackSlot didn't insert any code!"); - // Insert in reverse order. loadRegFromStackSlot can insert - // multiple instructions. - if (AtStart) - I = MBB->begin(); - else { - I = BeforeI; - ++I; - } - } - } - } - return; - } - - // Insert spills. - std::vector<CalleeSavedInfo> blockCSI; - for (CSRegBlockMap::iterator BI = CSRSave.begin(), - BE = CSRSave.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet save = BI->second; - - if (save.empty()) - continue; - - blockCSI.clear(); - for (CSRegSet::iterator RI = save.begin(), - RE = save.end(); RI != RE; ++RI) { - blockCSI.push_back(CSI[*RI]); - } - assert(blockCSI.size() > 0 && - "Could not collect callee saved register info"); - - I = MBB->begin(); - - // When shrink wrapping, use stack slot stores/loads. - for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { + // Spill using target interface. + I = EntryBlock->begin(); + if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. // It's killed at the spill. - MBB->addLiveIn(blockCSI[i].getReg()); + EntryBlock->addLiveIn(CSI[i].getReg()); // Insert the spill to the stack frame. - unsigned Reg = blockCSI[i].getReg(); + unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*MBB, I, Reg, - true, - blockCSI[i].getFrameIdx(), + TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, CSI[i].getFrameIdx(), RC, TRI); } } - for (CSRegBlockMap::iterator BI = CSRRestore.begin(), - BE = CSRRestore.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet restore = BI->second; - - if (restore.empty()) - continue; + // Restore using target interface. + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { + MachineBasicBlock *MBB = ReturnBlocks[ri]; + I = MBB->end(); + --I; - blockCSI.clear(); - for (CSRegSet::iterator RI = restore.begin(), - RE = restore.end(); RI != RE; ++RI) { - blockCSI.push_back(CSI[*RI]); - } - assert(blockCSI.size() > 0 && - "Could not find callee saved register info"); - - // If MBB is empty and needs restores, insert at the _beginning_. - if (MBB->empty()) { - I = MBB->begin(); - } else { - I = MBB->end(); - --I; - - // Skip over all terminator instructions, which are part of the - // return sequence. - if (! I->isTerminator()) { - ++I; - } else { - MachineBasicBlock::iterator I2 = I; - while (I2 != MBB->begin() && (--I2)->isTerminator()) - I = I2; - } - } + // Skip over all terminator instructions, which are part of the return + // sequence. + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->isTerminator()) + I = I2; bool AtStart = I == MBB->begin(); MachineBasicBlock::iterator BeforeI = I; @@ -426,21 +361,21 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Restore all registers immediately before the return and any // terminators that precede it. - for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { - unsigned Reg = blockCSI[i].getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(*MBB, I, Reg, - blockCSI[i].getFrameIdx(), - RC, TRI); - assert(I != MBB->begin() && - "loadRegFromStackSlot didn't insert any code!"); - // Insert in reverse order. loadRegFromStackSlot can insert - // multiple instructions. - if (AtStart) - I = MBB->begin(); - else { - I = BeforeI; - ++I; + if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } } } } @@ -545,14 +480,18 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { unsigned MaxAlign = MFI->getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the - // frame pointer if a frame pointer is required. + // incoming stack pointer if a frame pointer is required and is closer + // to the incoming rather than the final stack pointer. const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) && - !RegInfo->needsStackRealignment(Fn)) { + bool EarlyScavengingSlots = (TFI.hasFP(Fn) && + TFI.isFPCloseToIncomingSP() && + RegInfo->useFPForScavengingIndex(Fn) && + !RegInfo->needsStackRealignment(Fn)); + if (RS && EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); - for (SmallVector<int, 2>::iterator I = SFIs.begin(), - IE = SFIs.end(); I != IE; ++I) + for (SmallVectorImpl<int>::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); } @@ -632,12 +571,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure the special register scavenging spill slot is closest to the // stack pointer. - if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) || - !RegInfo->useFPForScavengingIndex(Fn))) { + if (RS && !EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); - for (SmallVector<int, 2>::iterator I = SFIs.begin(), - IE = SFIs.end(); I != IE; ++I) + for (SmallVectorImpl<int>::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); } @@ -712,6 +650,40 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { void PEI::replaceFrameIndices(MachineFunction &Fn) { if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? + // Store SPAdj at exit of a basic block. + SmallVector<int, 8> SPState; + SPState.resize(Fn.getNumBlockIDs()); + SmallPtrSet<MachineBasicBlock*, 8> Reachable; + + // Iterate over the reachable blocks in DFS order. + for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> > + DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable); + DFI != DFE; ++DFI) { + int SPAdj = 0; + // Check the exit state of the DFS stack predecessor. + if (DFI.getPathLength() >= 2) { + MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2); + assert(Reachable.count(StackPred) && + "DFS stack predecessor is already visited.\n"); + SPAdj = SPState[StackPred->getNumber()]; + } + MachineBasicBlock *BB = *DFI; + replaceFrameIndices(BB, Fn, SPAdj); + SPState[BB->getNumber()] = SPAdj; + } + + // Handle the unreachable blocks. + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + if (Reachable.count(BB)) + // Already handled in DFS traversal. + continue; + int SPAdj = 0; + replaceFrameIndices(BB, Fn, SPAdj); + } +} + +void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj) { const TargetMachine &TM = Fn.getTarget(); assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); @@ -722,89 +694,85 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); - for (MachineFunction::iterator BB = Fn.begin(), - E = Fn.end(); BB != E; ++BB) { -#ifndef NDEBUG - int SPAdjCount = 0; // frame setup / destroy count. -#endif - int SPAdj = 0; // SP offset due to call frame setup / destroy. - if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); + if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); - for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { - if (I->getOpcode() == FrameSetupOpcode || - I->getOpcode() == FrameDestroyOpcode) { -#ifndef NDEBUG - // Track whether we see even pairs of them - SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1; -#endif - // Remember how much SP has been adjusted to create the call - // frame. - int Size = I->getOperand(0).getImm(); - - if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || - (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) - Size = -Size; - - SPAdj += Size; - - MachineBasicBlock::iterator PrevI = BB->end(); - if (I != BB->begin()) PrevI = prior(I); - TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); - - // Visit the instructions created by eliminateCallFramePseudoInstr(). - if (PrevI == BB->end()) - I = BB->begin(); // The replaced instr was the first in the block. - else - I = llvm::next(PrevI); - continue; - } + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + // Remember how much SP has been adjusted to create the call + // frame. + int Size = I->getOperand(0).getImm(); - MachineInstr *MI = I; - bool DoIncr = true; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (!MI->getOperand(i).isFI()) - continue; + if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || + (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) + Size = -Size; - // Some instructions (e.g. inline asm instructions) can have - // multiple frame indices and/or cause eliminateFrameIndex - // to insert more than one instruction. We need the register - // scavenger to go through all of these instructions so that - // it can update its register information. We keep the - // iterator at the point before insertion so that we can - // revisit them in full. - bool AtBeginning = (I == BB->begin()); - if (!AtBeginning) --I; - - // If this instruction has a FrameIndex operand, we need to - // use that target machine register info object to eliminate - // it. - TRI.eliminateFrameIndex(MI, SPAdj, i, - FrameIndexVirtualScavenging ? NULL : RS); - - // Reset the iterator if we were at the beginning of the BB. - if (AtBeginning) { - I = BB->begin(); - DoIncr = false; - } + SPAdj += Size; + + MachineBasicBlock::iterator PrevI = BB->end(); + if (I != BB->begin()) PrevI = prior(I); + TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); - MI = 0; - break; + // Visit the instructions created by eliminateCallFramePseudoInstr(). + if (PrevI == BB->end()) + I = BB->begin(); // The replaced instr was the first in the block. + else + I = llvm::next(PrevI); + continue; + } + + MachineInstr *MI = I; + bool DoIncr = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (!MI->getOperand(i).isFI()) + continue; + + // Frame indicies in debug values are encoded in a target independent + // way with simply the frame index and offset rather than any + // target-specific addressing mode. + if (MI->isDebugValue()) { + assert(i == 0 && "Frame indicies can only appear as the first " + "operand of a DBG_VALUE machine instruction"); + unsigned Reg; + MachineOperand &Offset = MI->getOperand(1); + Offset.setImm(Offset.getImm() + + TFI->getFrameIndexReference( + Fn, MI->getOperand(0).getIndex(), Reg)); + MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/); + continue; } - if (DoIncr && I != BB->end()) ++I; + // Some instructions (e.g. inline asm instructions) can have + // multiple frame indices and/or cause eliminateFrameIndex + // to insert more than one instruction. We need the register + // scavenger to go through all of these instructions so that + // it can update its register information. We keep the + // iterator at the point before insertion so that we can + // revisit them in full. + bool AtBeginning = (I == BB->begin()); + if (!AtBeginning) --I; + + // If this instruction has a FrameIndex operand, we need to + // use that target machine register info object to eliminate + // it. + TRI.eliminateFrameIndex(MI, SPAdj, i, + FrameIndexVirtualScavenging ? NULL : RS); + + // Reset the iterator if we were at the beginning of the BB. + if (AtBeginning) { + I = BB->begin(); + DoIncr = false; + } - // Update register states. - if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); + MI = 0; + break; } - // If we have evenly matched pairs of frame setup / destroy instructions, - // make sure the adjustments come out to zero. If we don't have matched - // pairs, we can't be sure the missing bit isn't in another basic block - // due to a custom inserter playing tricks, so just asserting SPAdj==0 - // isn't sufficient. See tMOVCC on Thumb1, for example. - assert((SPAdjCount || SPAdj == 0) && - "Unbalanced call frame setup / destroy pairs?"); + if (DoIncr && I != BB->end()) ++I; + + // Update register states. + if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } } diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h index 87fff9a..77cfa2b 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h @@ -1,4 +1,4 @@ -//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -* --===// +//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -*---===// // // The LLVM Compiler Infrastructure // @@ -14,9 +14,6 @@ // This pass must be run after register allocation. After this pass is // executed, it is illegal to construct MO_FrameIndex operands. // -// This pass also implements a shrink wrapping variant of prolog/epilog -// insertion. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CODEGEN_PEI_H @@ -54,120 +51,28 @@ namespace llvm { // stack frame indexes. unsigned MinCSFrameIndex, MaxCSFrameIndex; - // Analysis info for spill/restore placement. - // "CSR": "callee saved register". - - // CSRegSet contains indices into the Callee Saved Register Info - // vector built by calculateCalleeSavedRegisters() and accessed - // via MF.getFrameInfo()->getCalleeSavedInfo(). - typedef SparseBitVector<> CSRegSet; - - // CSRegBlockMap maps MachineBasicBlocks to sets of callee - // saved register indices. - typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap; - - // Set and maps for computing CSR spill/restore placement: - // used in function (UsedCSRegs) - // used in a basic block (CSRUsed) - // anticipatable in a basic block (Antic{In,Out}) - // available in a basic block (Avail{In,Out}) - // to be spilled at the entry to a basic block (CSRSave) - // to be restored at the end of a basic block (CSRRestore) - CSRegSet UsedCSRegs; - CSRegBlockMap CSRUsed; - CSRegBlockMap AnticIn, AnticOut; - CSRegBlockMap AvailIn, AvailOut; - CSRegBlockMap CSRSave; - CSRegBlockMap CSRRestore; - // Entry and return blocks of the current function. MachineBasicBlock* EntryBlock; SmallVector<MachineBasicBlock*, 4> ReturnBlocks; - // Map of MBBs to top level MachineLoops. - DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops; - - // Flag to control shrink wrapping per-function: - // may choose to skip shrink wrapping for certain - // functions. - bool ShrinkWrapThisFunction; - // Flag to control whether to use the register scavenger to resolve // frame index materialization registers. Set according to // TRI->requiresFrameIndexScavenging() for the curren function. bool FrameIndexVirtualScavenging; -#ifndef NDEBUG - // Machine function handle. - MachineFunction* MF; - - // Flag indicating that the current function - // has at least one "short" path in the machine - // CFG from the entry block to an exit block. - bool HasFastExitPath; -#endif - - bool calculateSets(MachineFunction &Fn); - bool calcAnticInOut(MachineBasicBlock* MBB); - bool calcAvailInOut(MachineBasicBlock* MBB); - void calculateAnticAvail(MachineFunction &Fn); - bool addUsesForMEMERegion(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4>& blks); - bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks); - bool calcSpillPlacements(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4> &blks, - CSRegBlockMap &prevSpills); - bool calcRestorePlacements(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4> &blks, - CSRegBlockMap &prevRestores); - void placeSpillsAndRestores(MachineFunction &Fn); - void placeCSRSpillsAndRestores(MachineFunction &Fn); + void calculateSets(MachineFunction &Fn); void calculateCallsInformation(MachineFunction &Fn); void calculateCalleeSavedRegisters(MachineFunction &Fn); void insertCSRSpillsAndRestores(MachineFunction &Fn); void calculateFrameObjectOffsets(MachineFunction &Fn); void replaceFrameIndices(MachineFunction &Fn); + void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj); void scavengeFrameVirtualRegs(MachineFunction &Fn); void insertPrologEpilogCode(MachineFunction &Fn); - // Initialize DFA sets, called before iterations. - void clearAnticAvailSets(); - // Clear all sets constructed by shrink wrapping. - void clearAllSets(); - - // Initialize all shrink wrapping data. - void initShrinkWrappingInfo(); - - // Convienences for dealing with machine loops. - MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP); - MachineLoop* getTopLevelLoopParent(MachineLoop *LP); - - // Propgate CSRs used in MBB to all MBBs of loop LP. - void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP); - // Convenience for recognizing return blocks. bool isReturnBlock(MachineBasicBlock* MBB); - -#ifndef NDEBUG - // Debugging methods. - - // Mark this function as having fast exit paths. - void findFastExitPath(); - - // Verify placement of spills/restores. - void verifySpillRestorePlacement(); - - std::string getBasicBlockName(const MachineBasicBlock* MBB); - std::string stringifyCSRegSet(const CSRegSet& s); - void dumpSet(const CSRegSet& s); - void dumpUsed(MachineBasicBlock* MBB); - void dumpAllUsed(); - void dumpSets(MachineBasicBlock* MBB); - void dumpSets1(MachineBasicBlock* MBB); - void dumpAllSets(); - void dumpSRSets(); -#endif - }; } // End llvm namespace #endif diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp index c035590..293e306 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp @@ -43,13 +43,16 @@ static cl::opt<bool, true> VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), cl::desc("Verify during register allocation")); -const char *RegAllocBase::TimerGroupName = "Register Allocation"; +const char RegAllocBase::TimerGroupName[] = "Register Allocation"; bool RegAllocBase::VerifyEnabled = false; //===----------------------------------------------------------------------===// // RegAllocBase Implementation //===----------------------------------------------------------------------===// +// Pin the vtable to this file. +void RegAllocBase::anchor() {} + void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat) { @@ -99,14 +102,13 @@ void RegAllocBase::allocatePhysRegs() { // result from splitting. DEBUG(dbgs() << "\nselectOrSplit " << MRI->getRegClass(VirtReg->reg)->getName() - << ':' << PrintReg(VirtReg->reg) << ' ' << *VirtReg << '\n'); - typedef SmallVector<LiveInterval*, 4> VirtRegVec; + << ':' << *VirtReg << '\n'); + typedef SmallVector<unsigned, 4> VirtRegVec; VirtRegVec SplitVRegs; unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); if (AvailablePhysReg == ~0u) { // selectOrSplit failed to find a register! - const char *Msg = "ran out of registers during register allocation"; // Probably caused by an inline asm. MachineInstr *MI; for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg); @@ -114,9 +116,9 @@ void RegAllocBase::allocatePhysRegs() { if (MI->isInlineAsm()) break; if (MI) - MI->emitError(Msg); + MI->emitError("inline assembly requires more registers than available"); else - report_fatal_error(Msg); + report_fatal_error("ran out of registers during register allocation"); // Keep going after reporting the error. VRM->assignVirt2Phys(VirtReg->reg, RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front()); @@ -128,7 +130,7 @@ void RegAllocBase::allocatePhysRegs() { for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); I != E; ++I) { - LiveInterval *SplitVirtReg = *I; + LiveInterval *SplitVirtReg = &LIS->getInterval(*I); assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h index 064e40f..c17a8d9 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBase.h +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h @@ -38,7 +38,7 @@ #define LLVM_CODEGEN_REGALLOCBASE #include "llvm/ADT/OwningPtr.h" -#include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/RegisterClassInfo.h" namespace llvm { @@ -57,6 +57,7 @@ class Spiller; /// live range splitting. They must also override enqueue/dequeue to provide an /// assignment order. class RegAllocBase { + virtual void anchor(); protected: const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; @@ -90,10 +91,10 @@ protected: // or new set of split live virtual registers. It is up to the splitter to // converge quickly toward fully spilled live ranges. virtual unsigned selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<LiveInterval*> &splitLVRs) = 0; + SmallVectorImpl<unsigned> &splitLVRs) = 0; // Use this group name for NamedRegionTimer. - static const char *TimerGroupName; + static const char TimerGroupName[]; public: /// VerifyEnabled - True when -verify-regalloc is given. diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp index 7fcfe9e..6768e45 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -101,7 +102,7 @@ public: } virtual unsigned selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs); + SmallVectorImpl<unsigned> &SplitVRegs); /// Perform register allocation. virtual bool runOnMachineFunction(MachineFunction &mf); @@ -110,7 +111,7 @@ public: // that interfere with the most recently queried lvr. Return true if spilling // was successful, and append any new spilled/split intervals to splitLVRs. bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs); + SmallVectorImpl<unsigned> &SplitVRegs); static char ID; }; @@ -125,7 +126,6 @@ RABasic::RABasic(): MachineFunctionPass(ID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); - initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); @@ -142,9 +142,10 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveDebugVariables>(); AU.addPreserved<LiveDebugVariables>(); - AU.addRequired<CalculateSpillWeights>(); AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); AU.addRequiredID(MachineDominatorsID); AU.addPreservedID(MachineDominatorsID); AU.addRequired<MachineLoopInfo>(); @@ -165,7 +166,7 @@ void RABasic::releaseMemory() { // that interfere with VirtReg. The newly spilled or split live intervals are // returned by appending them to SplitVRegs. bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs) { + SmallVectorImpl<unsigned> &SplitVRegs) { // Record each interference and determine if all are spillable before mutating // either the union or live intervals. SmallVector<LiveInterval*, 8> Intfs; @@ -219,7 +220,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, // minimal, there is no value in caching them outside the scope of // selectOrSplit(). unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs) { + SmallVectorImpl<unsigned> &SplitVRegs) { // Populate a list of physical register spill candidates. SmallVector<unsigned, 8> PhysRegSpillCands; @@ -276,6 +277,11 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>(), getAnalysis<LiveRegMatrix>()); + + calculateSpillWeightsAndHints(*LIS, *MF, + getAnalysis<MachineLoopInfo>(), + getAnalysis<MachineBlockFrequencyInfo>()); + SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); allocatePhysRegs(); diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp index bb9c05c..e92dbd2 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -144,7 +144,7 @@ namespace { // not be erased. bool isBulkSpilling; - enum { + enum LLVM_ENUM_INT_TYPE(unsigned) { spillClean = 1, spillDirty = 100, spillImpossible = ~0u @@ -293,29 +293,26 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, // If this register is used by DBG_VALUE then insert new DBG_VALUE to // identify spilled location as the place to find corresponding variable's // value. - SmallVector<MachineInstr *, 4> &LRIDbgValues = + SmallVectorImpl<MachineInstr *> &LRIDbgValues = LiveDbgValueMap[LRI->VirtReg]; for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) { MachineInstr *DBG = LRIDbgValues[li]; - const MDNode *MDPtr = - DBG->getOperand(DBG->getNumOperands()-1).getMetadata(); - int64_t Offset = 0; - if (DBG->getOperand(1).isImm()) - Offset = DBG->getOperand(1).getImm(); + const MDNode *MDPtr = DBG->getOperand(2).getMetadata(); + bool IsIndirect = DBG->isIndirectDebugValue(); + uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0; DebugLoc DL; if (MI == MBB->end()) { // If MI is at basic block end then use last instruction's location. MachineBasicBlock::iterator EI = MI; DL = (--EI)->getDebugLoc(); - } - else + } else DL = MI->getDebugLoc(); - if (MachineInstr *NewDV = - TII->emitFrameIndexDebugValue(*MF, FI, Offset, MDPtr, DL)) { - MachineBasicBlock *MBB = DBG->getParent(); - MBB->insert(MI, NewDV); - DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); - } + MachineBasicBlock *MBB = DBG->getParent(); + MachineInstr *NewDV = + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(FI).addImm(Offset).addMetadata(MDPtr); + (void)NewDV; + DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); } // Now this register is spilled there is should not be any DBG_VALUE // pointing to this register because they are all pointing to spilled value @@ -572,7 +569,10 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, } // Nothing we can do. Report an error and keep going with a bad allocation. - MI->emitError("ran out of registers during register allocation"); + if (MI->isInlineAsm()) + MI->emitError("inline assembly requires more registers than available"); + else + MI->emitError("ran out of registers during register allocation"); definePhysReg(MI, *AO.begin(), regFree); return assignVirtToPhysReg(VirtReg, *AO.begin()); } @@ -859,25 +859,21 @@ void RAFast::AllocateBasicBlock() { } else { // Modify DBG_VALUE now that the value is in a spill slot. - int64_t Offset = MI->getOperand(1).getImm(); + bool IsIndirect = MI->isIndirectDebugValue(); + uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; const MDNode *MDPtr = MI->getOperand(MI->getNumOperands()-1).getMetadata(); DebugLoc DL = MI->getDebugLoc(); - if (MachineInstr *NewDV = - TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) { - DEBUG(dbgs() << "Modifying debug info due to spill:" << - "\t" << *MI); - MachineBasicBlock *MBB = MI->getParent(); - MBB->insert(MBB->erase(MI), NewDV); - // Scan NewDV operands from the beginning. - MI = NewDV; - ScanDbgValue = true; - break; - } else { - // We can't allocate a physreg for a DebugValue; sorry! - DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); - MO.setReg(0); - } + MachineBasicBlock *MBB = MI->getParent(); + MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL, + TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(SS).addImm(Offset).addMetadata(MDPtr); + DEBUG(dbgs() << "Modifying debug info due to spill:" + << "\t" << *NewDV); + // Scan NewDV operands from the beginning. + MI = NewDV; + ScanDbgValue = true; + break; } } LiveDbgValueMap[Reg].push_back(MI); diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp index 9eed1fc..c08d955 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -71,6 +72,7 @@ class RAGreedy : public MachineFunctionPass, // analyses SlotIndexes *Indexes; + MachineBlockFrequencyInfo *MBFI; MachineDominatorTree *DomTree; MachineLoopInfo *Loops; EdgeBundles *Bundles; @@ -118,7 +120,9 @@ class RAGreedy : public MachineFunctionPass, RS_Done }; +#ifndef NDEBUG static const char *const StageName[]; +#endif // RegInfo - Keep additional information about each live range. struct RegInfo { @@ -145,7 +149,7 @@ class RAGreedy : public MachineFunctionPass, void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { ExtraRegInfo.resize(MRI->getNumVirtRegs()); for (;Begin != End; ++Begin) { - unsigned Reg = (*Begin)->reg; + unsigned Reg = *Begin; if (ExtraRegInfo[Reg].Stage == RS_New) ExtraRegInfo[Reg].Stage = NewStage; } @@ -158,6 +162,8 @@ class RAGreedy : public MachineFunctionPass, EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {} + bool isMax() const { return BrokenHints == ~0u; } + bool operator<(const EvictionCost &O) const { if (BrokenHints != O.BrokenHints) return BrokenHints < O.BrokenHints; @@ -216,7 +222,7 @@ class RAGreedy : public MachineFunctionPass, /// class. SmallVector<GlobalSplitCandidate, 32> GlobalCand; - enum { NoCand = ~0u }; + enum LLVM_ENUM_INT_TYPE(unsigned) { NoCand = ~0u }; /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to /// NoCand which indicates the stack interval. @@ -237,7 +243,7 @@ public: virtual void enqueue(LiveInterval *LI); virtual LiveInterval *dequeue(); virtual unsigned selectOrSplit(LiveInterval&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); /// Perform register allocation. virtual bool runOnMachineFunction(MachineFunction &mf); @@ -249,33 +255,34 @@ private: void LRE_WillShrinkVirtReg(unsigned); void LRE_DidCloneVirtReg(unsigned, unsigned); - float calcSpillCost(); - bool addSplitConstraints(InterferenceCache::Cursor, float&); + BlockFrequency calcSpillCost(); + bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); void growRegion(GlobalSplitCandidate &Cand); - float calcGlobalSplitCost(GlobalSplitCandidate&); + BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&); bool calcCompactRegion(GlobalSplitCandidate&); void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); void calcGapWeights(unsigned, SmallVectorImpl<float>&); + unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); void evictInterference(LiveInterval&, unsigned, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned tryAssign(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned tryEvict(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&, unsigned = ~0u); + SmallVectorImpl<unsigned>&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned trySplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); }; } // end anonymous namespace @@ -308,7 +315,6 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); - initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); @@ -320,6 +326,8 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) { void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); AU.addRequired<AliasAnalysis>(); AU.addPreserved<AliasAnalysis>(); AU.addRequired<LiveIntervals>(); @@ -330,7 +338,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<LiveDebugVariables>(); AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); - AU.addRequired<CalculateSpillWeights>(); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); @@ -407,15 +414,28 @@ void RAGreedy::enqueue(LiveInterval *LI) { // everything else has been allocated. Prio = Size; } else { - // Everything is allocated in long->short order. Long ranges that don't fit - // should be spilled (or split) ASAP so they don't create interference. - Prio = (1u << 31) + Size; + if (ExtraRegInfo[Reg].Stage == RS_Assign && !LI->empty() && + LIS->intervalIsInOneMBB(*LI)) { + // Allocate original local ranges in linear instruction order. Since they + // are singly defined, this produces optimal coloring in the absence of + // global interference and other constraints. + Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); + } + else { + // Allocate global and split ranges in long->short order. Long ranges that + // don't fit should be spilled (or split) ASAP so they don't create + // interference. Mark a bit to prioritize global above local ranges. + Prio = (1u << 29) + Size; + } + // Mark a higher bit to prioritize global and local above RS_Split. + Prio |= (1u << 31); // Boost ranges that have a physical register hint. if (VRM->hasKnownPreference(Reg)) Prio |= (1u << 30); } - + // The virtual register number is a tie breaker for same-sized ranges. + // Give lower vreg numbers higher priority to assign them first. Queue.push(std::make_pair(Prio, ~Reg)); } @@ -435,7 +455,7 @@ LiveInterval *RAGreedy::dequeue() { /// tryAssign - Try to assign VirtReg to an available register. unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { Order.rewind(); unsigned PhysReg; while ((PhysReg = Order.next())) @@ -476,6 +496,31 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, // Interference eviction //===----------------------------------------------------------------------===// +unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + unsigned PhysReg; + while ((PhysReg = Order.next())) { + if (PhysReg == PrevReg) + continue; + + MCRegUnitIterator Units(PhysReg, TRI); + for (; Units.isValid(); ++Units) { + // Instantiate a "subquery", not to be confused with the Queries array. + LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]); + if (subQ.checkInterference()) + break; + } + // If no units have interference, break out with the current PhysReg. + if (!Units.isValid()) + break; + } + if (PhysReg) + DEBUG(dbgs() << "can reassign: " << VirtReg << " from " + << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI) + << '\n'); + return PhysReg; +} + /// shouldEvict - determine if A should evict the assigned live range B. The /// eviction policy defined by this function together with the allocation order /// defined by enqueue() decides which registers ultimately end up being split @@ -516,6 +561,8 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) return false; + bool IsLocal = LIS->intervalIsInOneMBB(VirtReg); + // Find VirtReg's cascade number. This will be unassigned if VirtReg was never // involved in an eviction before. If a cascade number was assigned, deny // evicting anything with the same or a newer cascade number. This prevents @@ -569,8 +616,17 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, // Abort if this would be too expensive. if (!(Cost < MaxCost)) return false; + if (Urgent) + continue; + // If !MaxCost.isMax(), then we're just looking for a cheap register. + // Evicting another local live range in this case could lead to suboptimal + // coloring. + if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && + !canReassign(*Intf, PhysReg)) { + return false; + } // Finally, apply the eviction policy for non-urgent evictions. - if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) + if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) return false; } } @@ -582,7 +638,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, /// from being assigned to Physreg. This assumes that canEvictInterference /// returned true. void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { // Make sure that VirtReg has a cascade number, and assign that cascade // number to every evicted register. These live ranges than then only be // evicted by a newer cascade, preventing infinite loops. @@ -614,7 +670,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, "Cannot decrease cascade number, illegal eviction"); ExtraRegInfo[Intf->reg].Cascade = Cascade; ++NumEvicted; - NewVRegs.push_back(Intf); + NewVRegs.push_back(Intf->reg); } } @@ -624,7 +680,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, /// @return Physreg to assign VirtReg, or 0. unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs, + SmallVectorImpl<unsigned> &NewVRegs, unsigned CostPerUseLimit) { NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); @@ -699,12 +755,12 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, /// that all preferences in SplitConstraints are met. /// Return false if there are no bundles with positive bias. bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, - float &Cost) { + BlockFrequency &Cost) { ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); // Reset interference dependent info. SplitConstraints.resize(UseBlocks.size()); - float StaticCost = 0; + BlockFrequency StaticCost = 0; for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; @@ -713,7 +769,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, Intf.moveToBlock(BC.Number); BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; - BC.ChangesValue = BI.FirstDef; + BC.ChangesValue = BI.FirstDef.isValid(); if (!Intf.hasInterference()) continue; @@ -742,8 +798,8 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, } // Accumulate the total frequency of inserted spill code. - if (Ins) - StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); + while (Ins--) + StaticCost += SpillPlacer->getBlockFrequency(BC.Number); } Cost = StaticCost; @@ -876,7 +932,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { SpillPlacer->prepare(Cand.LiveBundles); // The static split cost will be zero since Cand.Intf reports no interference. - float Cost; + BlockFrequency Cost; if (!addSplitConstraints(Cand.Intf, Cost)) { DEBUG(dbgs() << ", none.\n"); return false; @@ -901,8 +957,8 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { /// calcSpillCost - Compute how expensive it would be to split the live range in /// SA around all use blocks instead of forming bundle regions. -float RAGreedy::calcSpillCost() { - float Cost = 0; +BlockFrequency RAGreedy::calcSpillCost() { + BlockFrequency Cost = 0; ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; @@ -921,8 +977,8 @@ float RAGreedy::calcSpillCost() { /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. /// -float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { - float GlobalCost = 0; +BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { + BlockFrequency GlobalCost = 0; const BitVector &LiveBundles = Cand.LiveBundles; ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { @@ -936,8 +992,8 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); if (BI.LiveOut) Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); - if (Ins) - GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); + while (Ins--) + GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); } for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { @@ -949,8 +1005,10 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { if (RegIn && RegOut) { // We need double spill code if this block has interference. Cand.Intf.moveToBlock(Number); - if (Cand.Intf.hasInterference()) - GlobalCost += 2*SpillPlacer->getBlockFrequency(Number); + if (Cand.Intf.hasInterference()) { + GlobalCost += SpillPlacer->getBlockFrequency(Number); + GlobalCost += SpillPlacer->getBlockFrequency(Number); + } continue; } // live-in / stack-out or stack-in live-out. @@ -1067,7 +1125,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, SmallVector<unsigned, 8> IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(Reg, LREdit.regs()); + DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); ExtraRegInfo.resize(MRI->getNumVirtRegs()); unsigned OrigBlocks = SA->getNumLiveBlocks(); @@ -1078,7 +1136,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // - Block-local splits are candidates for local splitting. // - DCE leftovers should go back on the queue. for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { - LiveInterval &Reg = *LREdit.get(i); + LiveInterval &Reg = LIS->getInterval(LREdit.get(i)); // Ignore old intervals from DCE. if (getStage(Reg) != RS_New) @@ -1112,10 +1170,10 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, } unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { unsigned NumCands = 0; unsigned BestCand = NoCand; - float BestCost; + BlockFrequency BestCost; SmallVector<unsigned, 8> UsedCands; // Check if we can split this live range around a compact region. @@ -1123,11 +1181,11 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, if (HasCompact) { // Yes, keep GlobalCand[0] as the compact region candidate. NumCands = 1; - BestCost = HUGE_VALF; + BestCost = BlockFrequency::getMaxFrequency(); } else { // No benefit from the compact region, our fallback will be per-block // splitting. Make sure we find a solution that is cheaper than spilling. - BestCost = Hysteresis * calcSpillCost(); + BestCost = calcSpillCost(); DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); } @@ -1157,7 +1215,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, Cand.reset(IntfCache, PhysReg); SpillPlacer->prepare(Cand.LiveBundles); - float Cost; + BlockFrequency Cost; if (!addSplitConstraints(Cand.Intf, Cost)) { DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); continue; @@ -1193,7 +1251,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, }); if (Cost < BestCost) { BestCand = NumCands; - BestCost = Hysteresis * Cost; // Prevent rounding effects. + BestCost = Cost; } ++NumCands; } @@ -1247,7 +1305,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// creates a lot of local live ranges, that will be split by tryLocalSplit if /// they don't allocate. unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); unsigned Reg = VirtReg.reg; bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); @@ -1268,14 +1326,14 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, SE->finish(&IntvMap); // Tell LiveDebugVariables about the new ranges. - DebugVars->splitRegister(Reg, LREdit.regs()); + DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); ExtraRegInfo.resize(MRI->getNumVirtRegs()); // Sort out the new intervals created by splitting. The remainder interval // goes straight to spilling, the new local ranges get to stay RS_New. for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { - LiveInterval &LI = *LREdit.get(i); + LiveInterval &LI = LIS->getInterval(LREdit.get(i)); if (getStage(LI) == RS_New && IntvMap[i] == 0) setStage(LI, RS_Spill); } @@ -1299,7 +1357,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// This is similar to spilling to a larger register class. unsigned RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { // There is no point to this if there are no larger sub-classes. if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg))) return 0; @@ -1335,7 +1393,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVector<unsigned, 8> IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); ExtraRegInfo.resize(MRI->getNumVirtRegs()); // Assign all new registers to RS_Spill. This was the last chance. @@ -1406,9 +1464,9 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, // Add fixed interference. for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { - const LiveInterval &LI = LIS->getRegUnit(*Units); - LiveInterval::const_iterator I = LI.find(StartIdx); - LiveInterval::const_iterator E = LI.end(); + const LiveRange &LR = LIS->getRegUnit(*Units); + LiveRange::const_iterator I = LR.find(StartIdx); + LiveRange::const_iterator E = LR.end(); // Same loop as above. Mark any overlapped gaps as HUGE_VALF. for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { @@ -1419,7 +1477,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, break; for (; Gap != NumGaps; ++Gap) { - GapWeight[Gap] = HUGE_VALF; + GapWeight[Gap] = llvm::huge_valf; if (Uses[Gap+1].getBaseIndex() >= I->end) break; } @@ -1433,7 +1491,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, /// basic block. /// unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); @@ -1511,7 +1569,9 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned BestAfter = 0; float BestDiff = 0; - const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber()); + const float blockFreq = + SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * + (1.0f / BlockFrequency::getEntryFrequency()); SmallVector<float, 8> GapWeight; Order.rewind(); @@ -1523,7 +1583,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Remove any gaps with regmask clobbers. if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) - GapWeight[RegMaskGaps[i]] = HUGE_VALF; + GapWeight[RegMaskGaps[i]] = llvm::huge_valf; // Try to find the best sequence of gaps to close. // The new spill weight must be larger than any gap interference. @@ -1558,7 +1618,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Legally, without causing looping? bool Legal = !ProgressRequired || NewGaps < NumGaps; - if (Legal && MaxGap < HUGE_VALF) { + if (Legal && MaxGap < llvm::huge_valf) { // Estimate the new spill weight. Each instruction reads or writes the // register. Conservatively assume there are no read-modify-write // instructions. @@ -1625,7 +1685,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, SE->useIntv(SegStart, SegStop); SmallVector<unsigned, 8> IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); // If the new range has the same number of instructions as before, mark it as // RS_Split2 so the next split will be forced to make progress. Otherwise, @@ -1638,8 +1698,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, assert(!ProgressRequired && "Didn't make progress when it was required."); for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) if (IntvMap[i] == 1) { - setStage(*LREdit.get(i), RS_Split2); - DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg)); + setStage(LIS->getInterval(LREdit.get(i)), RS_Split2); + DEBUG(dbgs() << PrintReg(LREdit.get(i))); } DEBUG(dbgs() << '\n'); } @@ -1656,7 +1716,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// assignable. /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*>&NewVRegs) { + SmallVectorImpl<unsigned>&NewVRegs) { // Ranges must be Split2 or less. if (getStage(VirtReg) >= RS_Spill) return 0; @@ -1705,7 +1765,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, //===----------------------------------------------------------------------===// unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) @@ -1730,7 +1790,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, if (Stage < RS_Split) { setStage(VirtReg, RS_Split); DEBUG(dbgs() << "wait for second round\n"); - NewVRegs.push_back(&VirtReg); + NewVRegs.push_back(VirtReg.reg); return 0; } @@ -1770,6 +1830,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { getAnalysis<LiveIntervals>(), getAnalysis<LiveRegMatrix>()); Indexes = &getAnalysis<SlotIndexes>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); DomTree = &getAnalysis<MachineDominatorTree>(); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); Loops = &getAnalysis<MachineLoopInfo>(); @@ -1777,8 +1838,12 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { SpillPlacer = &getAnalysis<SpillPlacement>(); DebugVars = &getAnalysis<LiveDebugVariables>(); + calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI); + + DEBUG(LIS->dump()); + SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); - SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree)); + SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI)); ExtraRegInfo.clear(); ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp index 15a88e2..88c8201 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -40,6 +40,7 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -94,9 +95,7 @@ public: : MachineFunctionPass(ID), builder(b.take()), customPassID(cPassID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); - initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); } @@ -130,8 +129,8 @@ private: const TargetMachine *tm; const TargetRegisterInfo *tri; const TargetInstrInfo *tii; - const MachineLoopInfo *loopInfo; MachineRegisterInfo *mri; + const MachineBlockFrequencyInfo *mbfi; OwningPtr<Spiller> spiller; LiveIntervals *lis; @@ -158,13 +157,13 @@ char RegAllocPBQP::ID = 0; } // End anonymous namespace. -unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const { +unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::NodeId node) const { Node2VReg::const_iterator vregItr = node2VReg.find(node); assert(vregItr != node2VReg.end() && "No vreg for node."); return vregItr->second; } -PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const { +PBQP::Graph::NodeId PBQPRAProblem::getNodeForVReg(unsigned vreg) const { VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg); assert(nodeItr != vreg2Node.end() && "No node for vreg."); return nodeItr->second; @@ -188,7 +187,7 @@ unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const { } PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, - const MachineLoopInfo *loopInfo, + const MachineBlockFrequencyInfo *mbfi, const RegSet &vregs) { LiveIntervals *LIS = const_cast<LiveIntervals*>(lis); @@ -247,7 +246,7 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, } // Construct the node. - PBQP::Graph::NodeItr node = + PBQP::Graph::NodeId node = g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0)); // Record the mapping and allowed set in the problem. @@ -273,7 +272,7 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, assert(!l2.empty() && "Empty interval in vreg set?"); if (l1.overlaps(l2)) { - PBQP::Graph::EdgeItr edge = + PBQP::Graph::EdgeId edge = g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2), PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0)); @@ -313,10 +312,10 @@ void PBQPBuilder::addInterferenceCosts( PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, const LiveIntervals *lis, - const MachineLoopInfo *loopInfo, + const MachineBlockFrequencyInfo *mbfi, const RegSet &vregs) { - OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, loopInfo, vregs)); + OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, mbfi, vregs)); PBQP::Graph &g = p->getGraph(); const TargetMachine &tm = mf->getTarget(); @@ -350,7 +349,7 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, PBQP::PBQPNum cBenefit = copyFactor * LiveIntervals::getSpillWeight(false, true, - loopInfo->getLoopDepth(mbb)); + mbfi->getBlockFreq(mbb)); if (cp.isPhys()) { if (!mf->getRegInfo().isAllocatable(dst)) { @@ -364,16 +363,16 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, } if (pregOpt < allowed.size()) { ++pregOpt; // +1 to account for spill option. - PBQP::Graph::NodeItr node = p->getNodeForVReg(src); + PBQP::Graph::NodeId node = p->getNodeForVReg(src); addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit); } } else { const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst); const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src); - PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst); - PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src); - PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2); - if (edge == g.edgesEnd()) { + PBQP::Graph::NodeId node1 = p->getNodeForVReg(dst); + PBQP::Graph::NodeId node2 = p->getNodeForVReg(src); + PBQP::Graph::EdgeId edge = g.findEdge(node1, node2); + if (edge == g.invalidEdgeId()) { edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1, allowed2->size() + 1, 0)); @@ -432,13 +431,14 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { //au.addRequiredID(SplitCriticalEdgesID); if (customPassID) au.addRequiredID(*customPassID); - au.addRequired<CalculateSpillWeights>(); au.addRequired<LiveStacks>(); au.addPreserved<LiveStacks>(); - au.addRequired<MachineDominatorTree>(); - au.addPreserved<MachineDominatorTree>(); + au.addRequired<MachineBlockFrequencyInfo>(); + au.addPreserved<MachineBlockFrequencyInfo>(); au.addRequired<MachineLoopInfo>(); au.addPreserved<MachineLoopInfo>(); + au.addRequired<MachineDominatorTree>(); + au.addPreserved<MachineDominatorTree>(); au.addRequired<VirtRegMap>(); au.addPreserved<VirtRegMap>(); MachineFunctionPass::getAnalysisUsage(au); @@ -475,11 +475,11 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, const PBQP::Graph &g = problem.getGraph(); // Iterate over the nodes mapping the PBQP solution to a register // assignment. - for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(), - nodeEnd = g.nodesEnd(); - node != nodeEnd; ++node) { - unsigned vreg = problem.getVRegForNode(node); - unsigned alloc = solution.getSelection(node); + for (PBQP::Graph::NodeItr nodeItr = g.nodesBegin(), + nodeEnd = g.nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + unsigned vreg = problem.getVRegForNode(*nodeItr); + unsigned alloc = solution.getSelection(*nodeItr); if (problem.isPRegOption(vreg, alloc)) { unsigned preg = problem.getPRegForOption(vreg, alloc); @@ -489,7 +489,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, vrm->assignVirt2Phys(vreg, preg); } else if (problem.isSpillOption(vreg, alloc)) { vregsToAlloc.erase(vreg); - SmallVector<LiveInterval*, 8> newSpills; + SmallVector<unsigned, 8> newSpills; LiveRangeEdit LRE(&lis->getInterval(vreg), newSpills, *mf, *lis, vrm); spiller->spill(LRE); @@ -500,9 +500,10 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, // allocate. for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end(); itr != end; ++itr) { - assert(!(*itr)->empty() && "Empty spill range."); - DEBUG(dbgs() << PrintReg((*itr)->reg, tri) << " "); - vregsToAlloc.insert((*itr)->reg); + LiveInterval &li = lis->getInterval(*itr); + assert(!li.empty() && "Empty spill range."); + DEBUG(dbgs() << PrintReg(li.reg, tri) << " "); + vregsToAlloc.insert(li.reg); } DEBUG(dbgs() << ")\n"); @@ -546,7 +547,10 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { lis = &getAnalysis<LiveIntervals>(); lss = &getAnalysis<LiveStacks>(); - loopInfo = &getAnalysis<MachineLoopInfo>(); + mbfi = &getAnalysis<MachineBlockFrequencyInfo>(); + + calculateSpillWeightsAndHints(*lis, MF, getAnalysis<MachineLoopInfo>(), + *mbfi); vrm = &getAnalysis<VirtRegMap>(); spiller.reset(createInlineSpiller(*this, MF, *vrm)); @@ -584,7 +588,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n"); OwningPtr<PBQPRAProblem> problem( - builder->build(mf, lis, loopInfo, vregsToAlloc)); + builder->build(mf, lis, mbfi, vregsToAlloc)); #ifndef NDEBUG if (pbqpDumpGraphs) { diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp index 87382d8..cacd7de 100644 --- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -40,6 +40,9 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { if (MF->getTarget().getRegisterInfo() != TRI) { TRI = MF->getTarget().getRegisterInfo(); RegClass.reset(new RCInfo[TRI->getNumRegClasses()]); + unsigned NumPSets = TRI->getNumRegPressureSets(); + PSetLimits.reset(new unsigned[NumPSets]); + std::fill(&PSetLimits[0], &PSetLimits[NumPSets], 0); Update = true; } @@ -144,3 +147,32 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { RCI.Tag = Tag; } +/// This is not accurate because two overlapping register sets may have some +/// nonoverlapping reserved registers. However, computing the allocation order +/// for all register classes would be too expensive. +unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const { + const TargetRegisterClass *RC = 0; + unsigned NumRCUnits = 0; + for (TargetRegisterInfo::regclass_iterator + RI = TRI->regclass_begin(), RE = TRI->regclass_end(); RI != RE; ++RI) { + const int *PSetID = TRI->getRegClassPressureSets(*RI); + for (; *PSetID != -1; ++PSetID) { + if ((unsigned)*PSetID == Idx) + break; + } + if (*PSetID == -1) + continue; + + // Found a register class that counts against this pressure set. + // For efficiency, only compute the set order for the largest set. + unsigned NUnits = TRI->getRegClassWeight(*RI).WeightLimit; + if (!RC || NUnits > NumRCUnits) { + RC = *RI; + NumRCUnits = NUnits; + } + } + compute(RC); + unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC); + return TRI->getRegPressureSetLimit(Idx) + - TRI->getRegClassWeight(RC).RegWeight * NReserved; +} diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp index d85646d..dd86c1f 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -166,7 +166,8 @@ namespace { /// reMaterializeTrivialDef - If the source of a copy is defined by a /// trivial computation, replace the copy by rematerialize the definition. - bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI); + bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI, + bool &IsDefCopy); /// canJoinPhys - Return true if a physreg copy should be joined. bool canJoinPhys(const CoalescerPair &CP); @@ -397,7 +398,7 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { } void RegisterCoalescer::eliminateDeadDefs() { - SmallVector<LiveInterval*, 8> NewRegs; + SmallVector<unsigned, 8> NewRegs; LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs); } @@ -433,11 +434,11 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); - // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // BValNo is a value number in B that is defined by a copy from A. 'B1' in // the example above. - LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); - if (BLR == IntB.end()) return false; - VNInfo *BValNo = BLR->valno; + LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx); + if (BS == IntB.end()) return false; + VNInfo *BValNo = BS->valno; // Get the location that B is defined at. Two options: either this value has // an unknown definition point or it is defined at CopyIdx. If unknown, we @@ -446,10 +447,10 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // AValNo is the value number in A that defines the copy, A3 in the example. SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); - LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); - // The live range might not exist after fun with physreg coalescing. - if (ALR == IntA.end()) return false; - VNInfo *AValNo = ALR->valno; + LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx); + // The live segment might not exist after fun with physreg coalescing. + if (AS == IntA.end()) return false; + VNInfo *AValNo = AS->valno; // If AValNo is defined as a copy from IntB, we can potentially process this. // Get the instruction that defines this value number. @@ -458,54 +459,54 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy()) return false; - // Get the LiveRange in IntB that this value number starts with. - LiveInterval::iterator ValLR = - IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); - if (ValLR == IntB.end()) + // Get the Segment in IntB that this value number starts with. + LiveInterval::iterator ValS = + IntB.FindSegmentContaining(AValNo->def.getPrevSlot()); + if (ValS == IntB.end()) return false; - // Make sure that the end of the live range is inside the same block as + // Make sure that the end of the live segment is inside the same block as // CopyMI. - MachineInstr *ValLREndInst = - LIS->getInstructionFromIndex(ValLR->end.getPrevSlot()); - if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) + MachineInstr *ValSEndInst = + LIS->getInstructionFromIndex(ValS->end.getPrevSlot()); + if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent()) return false; - // Okay, we now know that ValLR ends in the same block that the CopyMI - // live-range starts. If there are no intervening live ranges between them in - // IntB, we can merge them. - if (ValLR+1 != BLR) return false; + // Okay, we now know that ValS ends in the same block that the CopyMI + // live-range starts. If there are no intervening live segments between them + // in IntB, we can merge them. + if (ValS+1 != BS) return false; DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI)); - SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; + SlotIndex FillerStart = ValS->end, FillerEnd = BS->start; // We are about to delete CopyMI, so need to remove it as the 'instruction // that defines this value #'. Update the valnum with the new defining // instruction #. BValNo->def = FillerStart; // Okay, we can merge them. We need to insert a new liverange: - // [ValLR.end, BLR.begin) of either value number, then we merge the + // [ValS.end, BS.begin) of either value number, then we merge the // two value numbers. - IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); + IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo)); // Okay, merge "B1" into the same value number as "B0". - if (BValNo != ValLR->valno) - IntB.MergeValueNumberInto(BValNo, ValLR->valno); + if (BValNo != ValS->valno) + IntB.MergeValueNumberInto(BValNo, ValS->valno); DEBUG(dbgs() << " result = " << IntB << '\n'); // If the source instruction was killing the source register before the // merge, unset the isKill marker given the live range has been extended. - int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); + int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true); if (UIdx != -1) { - ValLREndInst->getOperand(UIdx).setIsKill(false); + ValSEndInst->getOperand(UIdx).setIsKill(false); } // Rewrite the copy. If the copy instruction was killing the destination // register before the merge, find the last use and trim the live range. That // will also add the isKill marker. CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); - if (ALR->end == CopyIdx) + if (AS->end == CopyIdx) LIS->shrinkToUses(&IntA); ++numExtends; @@ -526,11 +527,11 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; - LiveInterval::Ranges::iterator BI = - std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); - if (BI != IntB.ranges.begin()) + LiveInterval::iterator BI = + std::upper_bound(IntB.begin(), IntB.end(), AI->start); + if (BI != IntB.begin()) --BI; - for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { + for (; BI != IntB.end() && AI->end >= BI->start; ++BI) { if (BI->valno == BValNo) continue; if (BI->start <= AI->start && BI->end > AI->start) @@ -576,14 +577,12 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, LiveInterval &IntB = LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); - // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // BValNo is a value number in B that is defined by a copy from A. 'B1' in // the example above. VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); if (!BValNo || BValNo->def != CopyIdx) return false; - assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); - // AValNo is the value number in A that defines the copy, A3 in the example. VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); assert(AValNo && "COPY source not live"); @@ -613,7 +612,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); unsigned NewReg = NewDstMO.getReg(); - if (NewReg != IntB.reg || !LiveRangeQuery(IntB, AValNo->def).isKill()) + if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill()) return false; // Make sure there are no other definitions of IntB that would reach the @@ -628,8 +627,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; SlotIndex UseIdx = LIS->getInstructionIndex(UseMI); - LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); - if (ULR == IntA.end() || ULR->valno != AValNo) + LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); + if (US == IntA.end() || US->valno != AValNo) continue; // If this use is tied to a def, we can't rewrite the register. if (UseMI->isRegTiedToDefOperand(UI.getOperandNo())) @@ -680,8 +679,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, continue; } SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); - LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); - if (ULR == IntA.end() || ULR->valno != AValNo) + LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); + if (US == IntA.end() || US->valno != AValNo) continue; // Kill flags are no longer accurate. They are recomputed after RA. UseMO.setIsKill(false); @@ -711,14 +710,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, UseMI->eraseFromParent(); } - // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition + // Extend BValNo by merging in IntA live segments of AValNo. Val# definition // is updated. VNInfo *ValNo = BValNo; ValNo->def = AValNo->def; for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; - IntB.addRange(LiveRange(AI->start, AI->end, ValNo)); + IntB.addSegment(LiveInterval::Segment(AI->start, AI->end, ValNo)); } DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); @@ -731,23 +730,29 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, /// reMaterializeTrivialDef - If the source of a copy is defined by a trivial /// computation, replace the copy by rematerialize the definition. bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, - MachineInstr *CopyMI) { + MachineInstr *CopyMI, + bool &IsDefCopy) { + IsDefCopy = false; unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg(); + unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx(); unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); + unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx(); if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) return false; LiveInterval &SrcInt = LIS->getInterval(SrcReg); - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); - LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); - assert(SrcLR != SrcInt.end() && "Live range not found!"); - VNInfo *ValNo = SrcLR->valno; + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI); + VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn(); + assert(ValNo && "CopyMI input register not live"); if (ValNo->isPHIDef() || ValNo->isUnused()) return false; MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def); if (!DefMI) return false; - assert(DefMI && "Defining instruction disappeared"); + if (DefMI->isCopyLike()) { + IsDefCopy = true; + return false; + } if (!DefMI->isAsCheapAsAMove()) return false; if (!TII->isTriviallyReMaterializable(DefMI, AA)) @@ -760,31 +765,41 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, return false; // Only support subregister destinations when the def is read-undef. MachineOperand &DstOperand = CopyMI->getOperand(0); + unsigned CopyDstReg = DstOperand.getReg(); if (DstOperand.getSubReg() && !DstOperand.isUndef()) return false; + + const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); if (!DefMI->isImplicitDef()) { - // Make sure the copy destination register class fits the instruction - // definition register class. The mismatch can happen as a result of earlier - // extract_subreg, insert_subreg, subreg_to_reg coalescing. - const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF); - if (TargetRegisterInfo::isVirtualRegister(DstReg)) { - if (!MRI->constrainRegClass(DstReg, RC)) + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + unsigned NewDstReg = DstReg; + + unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), + DefMI->getOperand(0).getSubReg()); + if (NewDstIdx) + NewDstReg = TRI->getSubReg(DstReg, NewDstIdx); + + // Finally, make sure that the physical subregister that will be + // constructed later is permitted for the instruction. + if (!DefRC->contains(NewDstReg)) return false; - } else if (!RC->contains(DstReg)) - return false; + } else { + // Theoretically, some stack frame reference could exist. Just make sure + // it hasn't actually happened. + assert(TargetRegisterInfo::isVirtualRegister(DstReg) && + "Only expect to deal with virtual or physical registers"); + } } MachineBasicBlock *MBB = CopyMI->getParent(); MachineBasicBlock::iterator MII = llvm::next(MachineBasicBlock::iterator(CopyMI)); - TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI); + TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, DefMI, *TRI); MachineInstr *NewMI = prior(MII); - // The original DefMI may have been a subregister def, but the full register - // class of its destination matches the destination of CopyMI, and CopyMI is - // either a full register def or is read-undef. Therefore we can clear the - // subregister index on the rematerialized instruction. - NewMI->getOperand(0).setSubReg(0); + LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); + CopyMI->eraseFromParent(); + ErasedInstrs.insert(CopyMI); // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86). // We need to remember these so we can add intervals once we insert @@ -800,6 +815,47 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, } } + if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + unsigned NewIdx = NewMI->getOperand(0).getSubReg(); + const TargetRegisterClass *RCForInst; + if (NewIdx) + RCForInst = TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg), DefRC, + NewIdx); + + if (MRI->constrainRegClass(DstReg, DefRC)) { + // The materialized instruction is quite capable of setting DstReg + // directly, but it may still have a now-trivial subregister index which + // we should clear. + NewMI->getOperand(0).setSubReg(0); + } else if (NewIdx && RCForInst) { + // The subreg index on NewMI is essential; we still have to make sure + // DstReg:idx is in a class that NewMI can use. + MRI->constrainRegClass(DstReg, RCForInst); + } else { + // DstReg is actually incompatible with NewMI, we have to move to a + // super-reg's class. This could come from a sequence like: + // GR32 = MOV32r0 + // GR8 = COPY GR32:sub_8 + MRI->setRegClass(DstReg, CP.getNewRC()); + updateRegDefsUses(DstReg, DstReg, DstIdx); + NewMI->getOperand(0).setSubReg( + TRI->composeSubRegIndices(SrcIdx, DefMI->getOperand(0).getSubReg())); + } + } else if (NewMI->getOperand(0).getReg() != CopyDstReg) { + // The New instruction may be defining a sub-register of what's actually + // been asked for. If so it must implicitly define the whole thing. + assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && + "Only expect virtual or physical registers in remat"); + NewMI->getOperand(0).setIsDead(true); + NewMI->addOperand(MachineOperand::CreateReg(CopyDstReg, + true /*IsDef*/, + true /*IsImp*/, + false /*IsKill*/)); + } + + if (NewMI->getOperand(0).getSubReg()) + NewMI->getOperand(0).setIsUndef(); + // CopyMI may have implicit operands, transfer them over to the newly // rematerialized instruction. And update implicit def interval valnos. for (unsigned i = CopyMI->getDesc().getNumOperands(), @@ -814,18 +870,14 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, } } - LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); - SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { unsigned Reg = NewMIImplDefs[i]; for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) - if (LiveInterval *LI = LIS->getCachedRegUnit(*Units)) - LI->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); + if (LiveRange *LR = LIS->getCachedRegUnit(*Units)) + LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); } - CopyMI->eraseFromParent(); - ErasedInstrs.insert(CopyMI); DEBUG(dbgs() << "Remat: " << *NewMI); ++NumReMats; @@ -994,7 +1046,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (CP.getSrcReg() == CP.getDstReg()) { LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); - LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(CopyMI)); + LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(CopyMI)); if (VNInfo *DefVNI = LRQ.valueDefined()) { VNInfo *ReadVNI = LRQ.valueIn(); assert(ReadVNI && "No value before copy and no <undef> flag."); @@ -1015,8 +1067,11 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (!canJoinPhys(CP)) { // Before giving up coalescing, if definition of source is defined by // trivial computation, try rematerializing it. - if (reMaterializeTrivialDef(CP, CopyMI)) + bool IsDefCopy; + if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) return true; + if (IsDefCopy) + Again = true; // May be possible to coalesce later. return false; } } else { @@ -1034,8 +1089,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { }); // When possible, let DstReg be the larger interval. - if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).ranges.size() > - LIS->getInterval(CP.getDstReg()).ranges.size()) + if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() > + LIS->getInterval(CP.getDstReg()).size()) CP.flip(); } @@ -1048,10 +1103,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // If definition of source is defined by trivial computation, try // rematerializing it. - if (reMaterializeTrivialDef(CP, CopyMI)) + bool IsDefCopy; + if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) return true; - // If we can eliminate the copy without merging the live ranges, do so now. + // If we can eliminate the copy without merging the live segments, do so + // now. if (!CP.isPartial() && !CP.isPhys()) { if (adjustCopiesBackFrom(CP, CopyMI) || removeCopyByCommutingDef(CP, CopyMI)) { @@ -1099,10 +1156,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); DEBUG({ - dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI); - if (!CP.isPhys()) + dbgs() << "\tJoined. Result = "; + if (CP.isPhys()) + dbgs() << PrintReg(CP.getDstReg(), TRI); + else dbgs() << LIS->getInterval(CP.getDstReg()); - dbgs() << '\n'; + dbgs() << '\n'; }); ++numJoins; @@ -1114,8 +1173,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { assert(CP.isPhys() && "Must be a physreg copy"); assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register"); LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); - DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS - << '\n'); + DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); assert(CP.isFlipped() && RHS.containsOneValue() && "Invalid join with reserved register"); @@ -1384,7 +1442,7 @@ VNInfo *JoinVals::stripCopies(VNInfo *VNI) { unsigned Reg = MI->getOperand(1).getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) break; - LiveRangeQuery LRQ(LIS->getInterval(Reg), VNI->def); + LiveQueryResult LRQ = LIS->getInterval(Reg).Query(VNI->def); if (!LRQ.valueIn()) break; VNI = LRQ.valueIn(); @@ -1435,7 +1493,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // The <read-undef> flag on the def operand means that old lane values are // not important. if (Redef) { - V.RedefVNI = LiveRangeQuery(LI, VNI->def).valueIn(); + V.RedefVNI = LI.Query(VNI->def).valueIn(); assert(V.RedefVNI && "Instruction is reading nonexistent value"); computeAssignment(V.RedefVNI->id, Other); V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; @@ -1452,7 +1510,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { } // Find the value in Other that overlaps VNI->def, if any. - LiveRangeQuery OtherLRQ(Other.LI, VNI->def); + LiveQueryResult OtherLRQ = Other.LI.Query(VNI->def); // It is possible that both values are defined by the same instruction, or // the values are PHIs defined in the same block. When that happens, the two @@ -1911,8 +1969,8 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI); JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI); - DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS - << "\n\t\tLHS = " << PrintReg(CP.getDstReg()) << ' ' << LHS + DEBUG(dbgs() << "\t\tRHS = " << RHS + << "\n\t\tLHS = " << LHS << '\n'); // First compute NewVNInfo and the simple value mappings. @@ -1943,8 +2001,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); // Join RHS into LHS. - LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo, - MRI); + LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo); // Kill flags are going to be wrong if the live ranges were overlapping. // Eventually, we should simply clear all kill flags when computing live @@ -1959,7 +2016,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // CR_Replace conflicts. DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: " << LHS << '\n'); - LIS->extendToIndices(&LHS, EndPoints); + LIS->extendToIndices(LHS, EndPoints); return true; } @@ -1985,9 +2042,8 @@ struct MBBPriorityInfo { // block (the unsigned), and then on the MBB number. // // EnableGlobalCopies assumes that the primary sort key is loop depth. -static int compareMBBPriority(const void *L, const void *R) { - const MBBPriorityInfo *LHS = static_cast<const MBBPriorityInfo*>(L); - const MBBPriorityInfo *RHS = static_cast<const MBBPriorityInfo*>(R); +static int compareMBBPriority(const MBBPriorityInfo *LHS, + const MBBPriorityInfo *RHS) { // Deeper loops first if (LHS->Depth != RHS->Depth) return LHS->Depth > RHS->Depth ? -1 : 1; @@ -2012,6 +2068,9 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { if (!Copy->isCopy()) return false; + if (Copy->getOperand(1).isUndef()) + return false; + unsigned SrcReg = Copy->getOperand(1).getReg(); unsigned DstReg = Copy->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(SrcReg) @@ -2057,8 +2116,8 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { // are not inherently easier to resolve, but slightly preferable until we // have local live range splitting. In particular this is required by // cmp+jmp macro fusion. - for (MachineBasicBlock::reverse_iterator - MII = MBB->rbegin(), E = MBB->rend(); MII != E; ++MII) { + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E; ++MII) { if (!MII->isCopyLike()) continue; if (isLocalCopy(&(*MII), LIS)) @@ -2142,7 +2201,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>(); if (EnableGlobalCopies == cl::BOU_UNSET) - JoinGlobalCopies = ST.enableMachineScheduler(); + JoinGlobalCopies = ST.useMachineScheduler(); else JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp index 97f22e1..092ecdd 100644 --- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp @@ -25,68 +25,39 @@ using namespace llvm; /// Increase pressure for each pressure set provided by TargetRegisterInfo. static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure, - std::vector<unsigned> &MaxSetPressure, - const int *PSet, unsigned Weight) { - for (; *PSet != -1; ++PSet) { - CurrSetPressure[*PSet] += Weight; - if (&CurrSetPressure != &MaxSetPressure - && CurrSetPressure[*PSet] > MaxSetPressure[*PSet]) { - MaxSetPressure[*PSet] = CurrSetPressure[*PSet]; - } - } + PSetIterator PSetI) { + unsigned Weight = PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) + CurrSetPressure[*PSetI] += Weight; } /// Decrease pressure for each pressure set provided by TargetRegisterInfo. static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure, - const int *PSet, unsigned Weight) { - for (; *PSet != -1; ++PSet) { - assert(CurrSetPressure[*PSet] >= Weight && "register pressure underflow"); - CurrSetPressure[*PSet] -= Weight; - } -} - -/// Directly increase pressure only within this RegisterPressure result. -void RegisterPressure::increase(unsigned Reg, const TargetRegisterInfo *TRI, - const MachineRegisterInfo *MRI) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - increaseSetPressure(MaxSetPressure, MaxSetPressure, - TRI->getRegClassPressureSets(RC), - TRI->getRegClassWeight(RC).RegWeight); - } - else { - increaseSetPressure(MaxSetPressure, MaxSetPressure, - TRI->getRegUnitPressureSets(Reg), - TRI->getRegUnitWeight(Reg)); - } -} - -/// Directly decrease pressure only within this RegisterPressure result. -void RegisterPressure::decrease(unsigned Reg, const TargetRegisterInfo *TRI, - const MachineRegisterInfo *MRI) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - decreaseSetPressure(MaxSetPressure, TRI->getRegClassPressureSets(RC), - TRI->getRegClassWeight(RC).RegWeight); - } - else { - decreaseSetPressure(MaxSetPressure, TRI->getRegUnitPressureSets(Reg), - TRI->getRegUnitWeight(Reg)); + PSetIterator PSetI) { + unsigned Weight = PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) { + assert(CurrSetPressure[*PSetI] >= Weight && "register pressure underflow"); + CurrSetPressure[*PSetI] -= Weight; } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -static void dumpSetPressure(const std::vector<unsigned> &SetPressure, - const TargetRegisterInfo *TRI) { +void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure, + const TargetRegisterInfo *TRI) { + bool Empty = true; for (unsigned i = 0, e = SetPressure.size(); i < e; ++i) { - if (SetPressure[i] != 0) + if (SetPressure[i] != 0) { dbgs() << TRI->getRegPressureSetName(i) << "=" << SetPressure[i] << '\n'; + Empty = false; + } } + if (Empty) + dbgs() << "\n"; } void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << "Max Pressure: "; - dumpSetPressure(MaxSetPressure, TRI); + dumpRegSetPressure(MaxSetPressure, TRI); dbgs() << "Live In: "; for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i) dbgs() << PrintReg(LiveInRegs[i], TRI) << " "; @@ -98,44 +69,33 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { } void RegPressureTracker::dump() const { - dbgs() << "Curr Pressure: "; - dumpSetPressure(CurrSetPressure, TRI); + if (!isTopClosed() || !isBottomClosed()) { + dbgs() << "Curr Pressure: "; + dumpRegSetPressure(CurrSetPressure, TRI); + } P.dump(TRI); } #endif /// Increase the current pressure as impacted by these registers and bump /// the high water mark if needed. -void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> Regs) { - for (unsigned I = 0, E = Regs.size(); I != E; ++I) { - if (TargetRegisterInfo::isVirtualRegister(Regs[I])) { - const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]); - increaseSetPressure(CurrSetPressure, P.MaxSetPressure, - TRI->getRegClassPressureSets(RC), - TRI->getRegClassWeight(RC).RegWeight); - } - else { - increaseSetPressure(CurrSetPressure, P.MaxSetPressure, - TRI->getRegUnitPressureSets(Regs[I]), - TRI->getRegUnitWeight(Regs[I])); +void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) { + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + PSetIterator PSetI = MRI->getPressureSets(RegUnits[i]); + unsigned Weight = PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) { + CurrSetPressure[*PSetI] += Weight; + if (CurrSetPressure[*PSetI] > P.MaxSetPressure[*PSetI]) { + P.MaxSetPressure[*PSetI] = CurrSetPressure[*PSetI]; + } } } } /// Simply decrease the current pressure as impacted by these registers. -void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> Regs) { - for (unsigned I = 0, E = Regs.size(); I != E; ++I) { - if (TargetRegisterInfo::isVirtualRegister(Regs[I])) { - const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]); - decreaseSetPressure(CurrSetPressure, - TRI->getRegClassPressureSets(RC), - TRI->getRegClassWeight(RC).RegWeight); - } - else { - decreaseSetPressure(CurrSetPressure, TRI->getRegUnitPressureSets(Regs[I]), - TRI->getRegUnitWeight(Regs[I])); - } - } +void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> RegUnits) { + for (unsigned I = 0, E = RegUnits.size(); I != E; ++I) + decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnits[I])); } /// Clear the result so it can be used for another round of pressure tracking. @@ -187,12 +147,30 @@ void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) { LiveInRegs.clear(); } -const LiveInterval *RegPressureTracker::getInterval(unsigned Reg) const { +const LiveRange *RegPressureTracker::getLiveRange(unsigned Reg) const { if (TargetRegisterInfo::isVirtualRegister(Reg)) return &LIS->getInterval(Reg); return LIS->getCachedRegUnit(Reg); } +void RegPressureTracker::reset() { + MBB = 0; + LIS = 0; + + CurrSetPressure.clear(); + LiveThruPressure.clear(); + P.MaxSetPressure.clear(); + + if (RequireIntervals) + static_cast<IntervalPressure&>(P).reset(); + else + static_cast<RegionPressure&>(P).reset(); + + LiveRegs.PhysRegs.clear(); + LiveRegs.VirtRegs.clear(); + UntiedDefs.clear(); +} + /// Setup the RegPressureTracker. /// /// TODO: Add support for pressure without LiveIntervals. @@ -200,13 +178,17 @@ void RegPressureTracker::init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, - MachineBasicBlock::const_iterator pos) + MachineBasicBlock::const_iterator pos, + bool ShouldTrackUntiedDefs) { + reset(); + MF = mf; TRI = MF->getTarget().getRegisterInfo(); RCI = rci; MRI = &MF->getRegInfo(); MBB = mbb; + TrackUntiedDefs = ShouldTrackUntiedDefs; if (RequireIntervals) { assert(lis && "IntervalPressure requires LiveIntervals"); @@ -216,16 +198,12 @@ void RegPressureTracker::init(const MachineFunction *mf, CurrPos = pos; CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0); - if (RequireIntervals) - static_cast<IntervalPressure&>(P).reset(); - else - static_cast<RegionPressure&>(P).reset(); P.MaxSetPressure = CurrSetPressure; - LiveRegs.PhysRegs.clear(); LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs()); - LiveRegs.VirtRegs.clear(); LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs()); + if (TrackUntiedDefs) + UntiedDefs.setUniverse(MRI->getNumVirtRegs()); } /// Does this pressure result have a valid top position and live ins. @@ -304,16 +282,36 @@ void RegPressureTracker::closeRegion() { // If both top and bottom are closed, do nothing. } +/// The register tracker is unaware of global liveness so ignores normal +/// live-thru ranges. However, two-address or coalesced chains can also lead +/// to live ranges with no holes. Count these to inform heuristics that we +/// can never drop below this pressure. +void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) { + LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0); + assert(isBottomClosed() && "need bottom-up tracking to intialize."); + for (unsigned i = 0, e = P.LiveOutRegs.size(); i < e; ++i) { + unsigned Reg = P.LiveOutRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(Reg) + && !RPTracker.hasUntiedDef(Reg)) { + increaseSetPressure(LiveThruPressure, MRI->getPressureSets(Reg)); + } + } +} + /// \brief Convenient wrapper for checking membership in RegisterOperands. -static bool containsReg(ArrayRef<unsigned> Regs, unsigned Reg) { - return std::find(Regs.begin(), Regs.end(), Reg) != Regs.end(); +/// (std::count() doesn't have an early exit). +static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) { + return std::find(RegUnits.begin(), RegUnits.end(), RegUnit) != RegUnits.end(); } /// Collect this instruction's unique uses and defs into SmallVectors for /// processing defs and uses in order. +/// +/// FIXME: always ignore tied opers class RegisterOperands { const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; + bool IgnoreDead; public: SmallVector<unsigned, 8> Uses; @@ -321,7 +319,8 @@ public: SmallVector<unsigned, 8> DeadDefs; RegisterOperands(const TargetRegisterInfo *tri, - const MachineRegisterInfo *mri): TRI(tri), MRI(mri) {} + const MachineRegisterInfo *mri, bool ID = false): + TRI(tri), MRI(mri), IgnoreDead(ID) {} /// Push this operand's register onto the correct vector. void collect(const MachineOperand &MO) { @@ -330,25 +329,27 @@ public: if (MO.readsReg()) pushRegUnits(MO.getReg(), Uses); if (MO.isDef()) { - if (MO.isDead()) - pushRegUnits(MO.getReg(), DeadDefs); + if (MO.isDead()) { + if (!IgnoreDead) + pushRegUnits(MO.getReg(), DeadDefs); + } else pushRegUnits(MO.getReg(), Defs); } } protected: - void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &Regs) { + void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &RegUnits) { if (TargetRegisterInfo::isVirtualRegister(Reg)) { - if (containsReg(Regs, Reg)) + if (containsReg(RegUnits, Reg)) return; - Regs.push_back(Reg); + RegUnits.push_back(Reg); } else if (MRI->isAllocatable(Reg)) { for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { - if (containsReg(Regs, *Units)) + if (containsReg(RegUnits, *Units)) continue; - Regs.push_back(*Units); + RegUnits.push_back(*Units); } } } @@ -367,6 +368,56 @@ static void collectOperands(const MachineInstr *MI, RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end()); } +/// Initialize an array of N PressureDiffs. +void PressureDiffs::init(unsigned N) { + Size = N; + if (N <= Max) { + memset(PDiffArray, 0, N * sizeof(PressureDiff)); + return; + } + Max = Size; + free(PDiffArray); + PDiffArray = reinterpret_cast<PressureDiff*>(calloc(N, sizeof(PressureDiff))); +} + +/// Add a change in pressure to the pressure diff of a given instruction. +void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec, + const MachineRegisterInfo *MRI) { + PSetIterator PSetI = MRI->getPressureSets(RegUnit); + int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) { + // Find an existing entry in the pressure diff for this PSet. + PressureDiff::iterator I = begin(), E = end(); + for (; I != E && I->isValid(); ++I) { + if (I->getPSet() >= *PSetI) + break; + } + // If all pressure sets are more constrained, skip the remaining PSets. + if (I == E) + break; + // Insert this PressureChange. + if (!I->isValid() || I->getPSet() != *PSetI) { + PressureChange PTmp = PressureChange(*PSetI); + for (PressureDiff::iterator J = I; J != E && PTmp.isValid(); ++J) + std::swap(*J,PTmp); + } + // Update the units for this pressure set. + I->setUnitInc(I->getUnitInc() + Weight); + } +} + +/// Record the pressure difference induced by the given operand list. +static void collectPDiff(PressureDiff &PDiff, RegisterOperands &RegOpers, + const MachineRegisterInfo *MRI) { + assert(!PDiff.begin()->isValid() && "stale PDiff"); + + for (unsigned i = 0, e = RegOpers.Defs.size(); i != e; ++i) + PDiff.addPressureChange(RegOpers.Defs[i], true, MRI); + + for (unsigned i = 0, e = RegOpers.Uses.size(); i != e; ++i) + PDiff.addPressureChange(RegOpers.Uses[i], false, MRI); +} + /// Force liveness of registers. void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) { for (unsigned i = 0, e = Regs.size(); i != e; ++i) { @@ -383,7 +434,7 @@ void RegPressureTracker::discoverLiveIn(unsigned Reg) { // At live in discovery, unconditionally increase the high water mark. P.LiveInRegs.push_back(Reg); - P.increase(Reg, TRI, MRI); + increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg)); } /// Add Reg to the live out set and increase max pressure. @@ -394,11 +445,16 @@ void RegPressureTracker::discoverLiveOut(unsigned Reg) { // At live out discovery, unconditionally increase the high water mark. P.LiveOutRegs.push_back(Reg); - P.increase(Reg, TRI, MRI); + increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg)); } -/// Recede across the previous instruction. -bool RegPressureTracker::recede() { +/// Recede across the previous instruction. If LiveUses is provided, record any +/// RegUnits that are made live by the current instruction's uses. This includes +/// registers that are both defined and used by the instruction. If a pressure +/// difference pointer is provided record the changes is pressure caused by this +/// instruction independent of liveness. +bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses, + PressureDiff *PDiff) { // Check for the top of the analyzable region. if (CurrPos == MBB->begin()) { closeRegion(); @@ -431,6 +487,9 @@ bool RegPressureTracker::recede() { RegisterOperands RegOpers(TRI, MRI); collectOperands(CurrPos, RegOpers); + if (PDiff) + collectPDiff(*PDiff, RegOpers, MRI); + // Boost pressure for all dead defs together. increaseRegPressure(RegOpers.DeadDefs); decreaseRegPressure(RegOpers.DeadDefs); @@ -439,10 +498,20 @@ bool RegPressureTracker::recede() { // TODO: consider earlyclobbers? for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { unsigned Reg = RegOpers.Defs[i]; - if (LiveRegs.erase(Reg)) - decreaseRegPressure(Reg); - else - discoverLiveOut(Reg); + bool DeadDef = false; + if (RequireIntervals) { + const LiveRange *LR = getLiveRange(Reg); + if (LR) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + DeadDef = LRQ.isDeadDef(); + } + } + if (!DeadDef) { + if (LiveRegs.erase(Reg)) + decreaseRegPressure(Reg); + else + discoverLiveOut(Reg); + } } // Generate liveness for uses. @@ -451,12 +520,24 @@ bool RegPressureTracker::recede() { if (!LiveRegs.contains(Reg)) { // Adjust liveouts if LiveIntervals are available. if (RequireIntervals) { - const LiveInterval *LI = getInterval(Reg); - if (LI && !LI->killedAt(SlotIdx)) - discoverLiveOut(Reg); + const LiveRange *LR = getLiveRange(Reg); + if (LR) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + if (!LRQ.isKill() && !LRQ.valueDefined()) + discoverLiveOut(Reg); + } } increaseRegPressure(Reg); LiveRegs.insert(Reg); + if (LiveUses && !containsReg(*LiveUses, Reg)) + LiveUses->push_back(Reg); + } + } + if (TrackUntiedDefs) { + for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = RegOpers.Defs[i]; + if (TargetRegisterInfo::isVirtualRegister(Reg) && !LiveRegs.contains(Reg)) + UntiedDefs.insert(Reg); } } return true; @@ -464,6 +545,8 @@ bool RegPressureTracker::recede() { /// Advance across the current instruction. bool RegPressureTracker::advance() { + assert(!TrackUntiedDefs && "unsupported mode"); + // Check for the bottom of the analyzable region. if (CurrPos == MBB->end()) { closeRegion(); @@ -496,8 +579,8 @@ bool RegPressureTracker::advance() { // Kill liveness at last uses. bool lastUse = false; if (RequireIntervals) { - const LiveInterval *LI = getInterval(Reg); - lastUse = LI && LI->killedAt(SlotIdx); + const LiveRange *LR = getLiveRange(Reg); + lastUse = LR && LR->Query(SlotIdx).isKill(); } else { // Allocatable physregs are always single-use before register rewriting. @@ -533,9 +616,9 @@ bool RegPressureTracker::advance() { static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, ArrayRef<unsigned> NewPressureVec, RegPressureDelta &Delta, - const TargetRegisterInfo *TRI) { - int ExcessUnits = 0; - unsigned PSetID = ~0U; + const RegisterClassInfo *RCI, + ArrayRef<unsigned> LiveThruPressureVec) { + Delta.Excess = PressureChange(); for (unsigned i = 0, e = OldPressureVec.size(); i < e; ++i) { unsigned POld = OldPressureVec[i]; unsigned PNew = NewPressureVec[i]; @@ -543,7 +626,10 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, if (!PDiff) // No change in this set in the common case. continue; // Only consider change beyond the limit. - unsigned Limit = TRI->getRegPressureSetLimit(i); + unsigned Limit = RCI->getRegPressureSetLimit(i); + if (!LiveThruPressureVec.empty()) + Limit += LiveThruPressureVec[i]; + if (Limit > POld) { if (Limit > PNew) PDiff = 0; // Under the limit @@ -553,13 +639,12 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, else if (Limit > PNew) PDiff = Limit - POld; // Just obeyed limit. - if (std::abs(PDiff) > std::abs(ExcessUnits)) { - ExcessUnits = PDiff; - PSetID = i; + if (PDiff) { + Delta.Excess = PressureChange(i); + Delta.Excess.setUnitInc(PDiff); + break; } } - Delta.Excess.PSetID = PSetID; - Delta.Excess.UnitIncrease = ExcessUnits; } /// Find the max change in max pressure that either surpasses a critical PSet @@ -570,11 +655,11 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, /// RegPressureTracker API change to work with pressure differences. static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec, ArrayRef<unsigned> NewMaxPressureVec, - ArrayRef<PressureElement> CriticalPSets, + ArrayRef<PressureChange> CriticalPSets, ArrayRef<unsigned> MaxPressureLimit, RegPressureDelta &Delta) { - Delta.CriticalMax = PressureElement(); - Delta.CurrentMax = PressureElement(); + Delta.CriticalMax = PressureChange(); + Delta.CurrentMax = PressureChange(); unsigned CritIdx = 0, CritEnd = CriticalPSets.size(); for (unsigned i = 0, e = OldMaxPressureVec.size(); i < e; ++i) { @@ -583,23 +668,25 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec, if (PNew == POld) // No change in this set in the common case. continue; - while (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID < i) - ++CritIdx; + if (!Delta.CriticalMax.isValid()) { + while (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() < i) + ++CritIdx; - if (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID == i) { - int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].UnitIncrease; - if (PDiff > Delta.CriticalMax.UnitIncrease) { - Delta.CriticalMax.PSetID = i; - Delta.CriticalMax.UnitIncrease = PDiff; + if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == i) { + int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].getUnitInc(); + if (PDiff > 0) { + Delta.CriticalMax = PressureChange(i); + Delta.CriticalMax.setUnitInc(PDiff); + } } } - - // Find the greatest increase above MaxPressureLimit. + // Find the first increase above MaxPressureLimit. // (Ignores negative MDiff). - int MDiff = (int)PNew - (int)MaxPressureLimit[i]; - if (MDiff > Delta.CurrentMax.UnitIncrease) { - Delta.CurrentMax.PSetID = i; - Delta.CurrentMax.UnitIncrease = PNew; + if (!Delta.CurrentMax.isValid() && PNew > MaxPressureLimit[i]) { + Delta.CurrentMax = PressureChange(i); + Delta.CurrentMax.setUnitInc(PNew - POld); + if (CritIdx == CritEnd || Delta.CriticalMax.isValid()) + break; } } } @@ -614,7 +701,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { assert(!MI->isDebugValue() && "Expect a nondebug instruction."); // Account for register pressure similar to RegPressureTracker::recede(). - RegisterOperands RegOpers(TRI, MRI); + RegisterOperands RegOpers(TRI, MRI, /*IgnoreDead=*/true); collectOperands(MI, RegOpers); // Boost max pressure for all dead defs together. @@ -625,8 +712,19 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { // Kill liveness at live defs. for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { unsigned Reg = RegOpers.Defs[i]; - if (!containsReg(RegOpers.Uses, Reg)) - decreaseRegPressure(Reg); + bool DeadDef = false; + if (RequireIntervals) { + const LiveRange *LR = getLiveRange(Reg); + if (LR) { + SlotIndex SlotIdx = LIS->getInstructionIndex(MI); + LiveQueryResult LRQ = LR->Query(SlotIdx); + DeadDef = LRQ.isDeadDef(); + } + } + if (!DeadDef) { + if (!containsReg(RegOpers.Uses, Reg)) + decreaseRegPressure(Reg); + } } // Generate liveness for uses. for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { @@ -648,8 +746,9 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { /// result per-SUnit with enough information to adjust for the current /// scheduling position. But this works as a proof of concept. void RegPressureTracker:: -getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, - ArrayRef<PressureElement> CriticalPSets, +getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, + RegPressureDelta &Delta, + ArrayRef<PressureChange> CriticalPSets, ArrayRef<unsigned> MaxPressureLimit) { // Snapshot Pressure. // FIXME: The snapshot heap space should persist. But I'm planning to @@ -659,15 +758,117 @@ getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, bumpUpwardPressure(MI); - computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI); + computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, RCI, + LiveThruPressure); computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, MaxPressureLimit, Delta); - assert(Delta.CriticalMax.UnitIncrease >= 0 && - Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure"); + assert(Delta.CriticalMax.getUnitInc() >= 0 && + Delta.CurrentMax.getUnitInc() >= 0 && "cannot decrease max pressure"); // Restore the tracker's state. P.MaxSetPressure.swap(SavedMaxPressure); CurrSetPressure.swap(SavedPressure); + +#ifndef NDEBUG + if (!PDiff) + return; + + // Check if the alternate algorithm yields the same result. + RegPressureDelta Delta2; + getUpwardPressureDelta(MI, *PDiff, Delta2, CriticalPSets, MaxPressureLimit); + if (Delta != Delta2) { + dbgs() << "DELTA: " << *MI; + if (Delta.Excess.isValid()) + dbgs() << "Excess1 " << TRI->getRegPressureSetName(Delta.Excess.getPSet()) + << " " << Delta.Excess.getUnitInc() << "\n"; + if (Delta.CriticalMax.isValid()) + dbgs() << "Critic1 " << TRI->getRegPressureSetName(Delta.CriticalMax.getPSet()) + << " " << Delta.CriticalMax.getUnitInc() << "\n"; + if (Delta.CurrentMax.isValid()) + dbgs() << "CurrMx1 " << TRI->getRegPressureSetName(Delta.CurrentMax.getPSet()) + << " " << Delta.CurrentMax.getUnitInc() << "\n"; + if (Delta2.Excess.isValid()) + dbgs() << "Excess2 " << TRI->getRegPressureSetName(Delta2.Excess.getPSet()) + << " " << Delta2.Excess.getUnitInc() << "\n"; + if (Delta2.CriticalMax.isValid()) + dbgs() << "Critic2 " << TRI->getRegPressureSetName(Delta2.CriticalMax.getPSet()) + << " " << Delta2.CriticalMax.getUnitInc() << "\n"; + if (Delta2.CurrentMax.isValid()) + dbgs() << "CurrMx2 " << TRI->getRegPressureSetName(Delta2.CurrentMax.getPSet()) + << " " << Delta2.CurrentMax.getUnitInc() << "\n"; + llvm_unreachable("RegP Delta Mismatch"); + } +#endif +} + +/// This is a prototype of the fast version of querying register pressure that +/// does not directly depend on current liveness. It's still slow because we +/// recompute pressure change on-the-fly. This implementation only exists to +/// prove correctness. +/// +/// @param Delta captures information needed for heuristics. +/// +/// @param CriticalPSets Are the pressure sets that are known to exceed some +/// limit within the region, not necessarily at the current position. +/// +/// @param MaxPressureLimit Is the max pressure within the region, not +/// necessarily at the current position. +void RegPressureTracker:: +getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff, + RegPressureDelta &Delta, + ArrayRef<PressureChange> CriticalPSets, + ArrayRef<unsigned> MaxPressureLimit) const { + unsigned CritIdx = 0, CritEnd = CriticalPSets.size(); + for (PressureDiff::const_iterator + PDiffI = PDiff.begin(), PDiffE = PDiff.end(); + PDiffI != PDiffE && PDiffI->isValid(); ++PDiffI) { + + unsigned PSetID = PDiffI->getPSet(); + unsigned Limit = RCI->getRegPressureSetLimit(PSetID); + if (!LiveThruPressure.empty()) + Limit += LiveThruPressure[PSetID]; + + unsigned POld = CurrSetPressure[PSetID]; + unsigned MOld = P.MaxSetPressure[PSetID]; + unsigned MNew = MOld; + // Ignore DeadDefs here because they aren't captured by PressureChange. + unsigned PNew = POld + PDiffI->getUnitInc(); + assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld) && "PSet overflow"); + if (PNew > MOld) + MNew = PNew; + // Check if current pressure has exceeded the limit. + if (!Delta.Excess.isValid()) { + unsigned ExcessInc = 0; + if (PNew > Limit) + ExcessInc = POld > Limit ? PNew - POld : PNew - Limit; + else if (POld > Limit) + ExcessInc = Limit - POld; + if (ExcessInc) { + Delta.Excess = PressureChange(PSetID); + Delta.Excess.setUnitInc(ExcessInc); + } + } + // Check if max pressure has exceeded a critical pressure set max. + if (MNew == MOld) + continue; + if (!Delta.CriticalMax.isValid()) { + while (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() < PSetID) + ++CritIdx; + + if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == PSetID) { + int CritInc = (int)MNew - (int)CriticalPSets[CritIdx].getUnitInc(); + if (CritInc > 0 && CritInc <= INT16_MAX) { + Delta.CriticalMax = PressureChange(PSetID); + Delta.CriticalMax.setUnitInc(CritInc); + } + } + } + // Check if max pressure has exceeded the current max. + if (!Delta.CurrentMax.isValid() && MNew > MaxPressureLimit[PSetID]) { + Delta.CurrentMax = PressureChange(PSetID); + Delta.CurrentMax.setUnitInc(MNew - MOld); + } + } } /// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx). @@ -713,10 +914,12 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { // FIXME: allow the caller to pass in the list of vreg uses that remain // to be bottom-scheduled to avoid searching uses at each query. SlotIndex CurrIdx = getCurrSlot(); - const LiveInterval *LI = getInterval(Reg); - if (LI && LI->killedAt(SlotIdx) - && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) { - decreaseRegPressure(Reg); + const LiveRange *LR = getLiveRange(Reg); + if (LR) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) { + decreaseRegPressure(Reg); + } } } else if (!TargetRegisterInfo::isVirtualRegister(Reg)) { @@ -741,7 +944,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { /// This assumes that the current LiveIn set is sufficient. void RegPressureTracker:: getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, - ArrayRef<PressureElement> CriticalPSets, + ArrayRef<PressureChange> CriticalPSets, ArrayRef<unsigned> MaxPressureLimit) { // Snapshot Pressure. std::vector<unsigned> SavedPressure = CurrSetPressure; @@ -749,11 +952,12 @@ getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, bumpDownwardPressure(MI); - computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI); + computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, RCI, + LiveThruPressure); computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, MaxPressureLimit, Delta); - assert(Delta.CriticalMax.UnitIncrease >= 0 && - Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure"); + assert(Delta.CriticalMax.getUnitInc() >= 0 && + Delta.CurrentMax.getUnitInc() >= 0 && "cannot decrease max pressure"); // Restore the tracker's state. P.MaxSetPressure.swap(SavedMaxPressure); diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp index f82ccbe..75ebdaa 100644 --- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -31,9 +31,8 @@ using namespace llvm; /// setUsed - Set the register and its sub-registers as being used. void RegScavenger::setUsed(unsigned Reg) { - RegsAvailable.reset(Reg); - - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) RegsAvailable.reset(*SubRegs); } @@ -45,8 +44,8 @@ bool RegScavenger::isAliasUsed(unsigned Reg) const { } void RegScavenger::initRegState() { - for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(), - IE = Scavenged.end(); I != IE; ++I) { + for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { I->Reg = 0; I->Restore = NULL; } @@ -105,8 +104,8 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { } void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) { - BV.set(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) BV.set(*SubRegs); } @@ -182,8 +181,8 @@ void RegScavenger::forward() { MachineInstr *MI = MBBI; - for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(), - IE = Scavenged.end(); I != IE; ++I) { + for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { if (I->Restore != MI) continue; @@ -369,7 +368,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Exclude all the registers being used by the instruction. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { MachineOperand &MO = I->getOperand(i); - if (MO.isReg() && MO.getReg() != 0 && + if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && !TargetRegisterInfo::isVirtualRegister(MO.getReg())) Candidates.reset(MO.getReg()); } diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp index 07e5b47..75e3790 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -64,8 +64,8 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { /// specified node. bool SUnit::addPred(const SDep &D, bool Required) { // If this node already has this depenence, don't add a redundant one. - for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); - I != E; ++I) { + for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) { // Zero-latency weak edges may be added purely for heuristic ordering. Don't // add them if another kind of edge already exists. if (!Required && I->getSUnit() == D.getSUnit()) @@ -77,7 +77,7 @@ bool SUnit::addPred(const SDep &D, bool Required) { // Find the corresponding successor in N. SDep ForwardD = *I; ForwardD.setSUnit(this); - for (SmallVector<SDep, 4>::iterator II = PredSU->Succs.begin(), + for (SmallVectorImpl<SDep>::iterator II = PredSU->Succs.begin(), EE = PredSU->Succs.end(); II != EE; ++II) { if (*II == ForwardD) { II->setLatency(D.getLatency()); @@ -132,8 +132,8 @@ bool SUnit::addPred(const SDep &D, bool Required) { /// the specified node. void SUnit::removePred(const SDep &D) { // Find the matching predecessor. - for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); - I != E; ++I) + for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) if (*I == D) { // Find the corresponding successor in N. SDep P = D; diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index e4da6a4..7f1f9c4 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -36,6 +36,8 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <queue> + using namespace llvm; static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, @@ -98,7 +100,7 @@ static void getUnderlyingObjects(const Value *V, SmallVector<Value *, 4> Objs; GetUnderlyingObjects(const_cast<Value *>(V), Objs); - for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end(); + for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { V = *I; if (!Visited.insert(V)) @@ -116,12 +118,15 @@ static void getUnderlyingObjects(const Value *V, } while (!Working.empty()); } +typedef SmallVector<PointerIntPair<const Value *, 1, bool>, 4> +UnderlyingObjectsVector; + /// getUnderlyingObjectsForInstr - If this machine instr has memory reference /// information and it can be tracked to a normal reference to a known /// object, return the Value for that object. static void getUnderlyingObjectsForInstr(const MachineInstr *MI, - const MachineFrameInfo *MFI, - SmallVectorImpl<std::pair<const Value *, bool> > &Objects) { + const MachineFrameInfo *MFI, + UnderlyingObjectsVector &Objects) { if (!MI->hasOneMemOperand() || !(*MI->memoperands_begin())->getValue() || (*MI->memoperands_begin())->isVolatile()) @@ -134,8 +139,8 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, SmallVector<Value *, 4> Objs; getUnderlyingObjects(V, Objs); - for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end(); - I != IE; ++I) { + for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); + I != IE; ++I) { bool MayAlias = true; V = *I; @@ -155,7 +160,7 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, return; } - Objects.push_back(std::make_pair(V, MayAlias)); + Objects.push_back(UnderlyingObjectsVector::value_type(V, MayAlias)); } } @@ -175,14 +180,11 @@ void ScheduleDAGInstrs::finishBlock() { void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, - unsigned endcount) { + unsigned regioninstrs) { assert(bb == BB && "startBlock should set BB"); RegionBegin = begin; RegionEnd = end; - EndIndex = endcount; - MISUnitMap.clear(); - - ScheduleDAG::clearDAG(); + NumRegionInstrs = regioninstrs; } /// Close the current scheduling region. Don't clear any state in case the @@ -267,13 +269,10 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { SU->hasPhysRegDefs = true; Dep = SDep(SU, SDep::Data, *Alias); RegUse = UseSU->getInstr(); - Dep.setMinLatency( - SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, - RegUse, UseOp, /*FindMin=*/true)); } Dep.setLatency( - SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, - RegUse, UseOp, /*FindMin=*/false)); + SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, + UseOp)); ST.adjustSchedDependency(SU, UseSU, Dep); UseSU->addPred(Dep); @@ -310,10 +309,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias)); else { SDep Dep(SU, Kind, /*Reg=*/*Alias); - unsigned OutLatency = - SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()); - Dep.setMinLatency(OutLatency); - Dep.setLatency(OutLatency); + Dep.setLatency( + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); DefSU->addPred(Dep); } } @@ -389,10 +386,8 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { SUnit *DefSU = DefI->SU; if (DefSU != SU && DefSU != &ExitSU) { SDep Dep(SU, SDep::Output, Reg); - unsigned OutLatency = - SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()); - Dep.setMinLatency(OutLatency); - Dep.setLatency(OutLatency); + Dep.setLatency( + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); DefSU->addPred(Dep); } DefI->SU = SU; @@ -409,9 +404,19 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { MachineInstr *MI = SU->getInstr(); unsigned Reg = MI->getOperand(OperIdx).getReg(); + // Record this local VReg use. + VReg2UseMap::iterator UI = VRegUses.find(Reg); + for (; UI != VRegUses.end(); ++UI) { + if (UI->SU == SU) + break; + } + if (UI == VRegUses.end()) + VRegUses.insert(VReg2SUnit(Reg, SU)); + // Lookup this operand's reaching definition. assert(LIS && "vreg dependencies requires LiveIntervals"); - LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI)); + LiveQueryResult LRQ + = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI)); VNInfo *VNI = LRQ.valueIn(); // VNI will be valid because MachineOperand::readsReg() is checked by caller. @@ -427,10 +432,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { // Adjust the dependence latency using operand def/use information, then // allow the target to perform its own adjustments. int DefOp = Def->findRegisterDefOperandIdx(Reg); - dep.setLatency( - SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false)); - dep.setMinLatency( - SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true)); + dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx)); const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); @@ -472,8 +474,8 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, SmallVector<Value *, 4> Objs; getUnderlyingObjects(V, Objs); - for (SmallVector<Value *, 4>::iterator I = Objs.begin(), - IE = Objs.end(); I != IE; ++I) { + for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), + IE = Objs.end(); I != IE; ++I) { V = *I; if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { @@ -642,8 +644,7 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, bool isNormalMemory = false) { // If this is a false dependency, // do not add the edge, but rememeber the rejected node. - if (!EnableAASchedMI || - MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + if (!AA || MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); Dep.setLatency(TrueMemOrderLatency); SUb->addPred(Dep); @@ -671,7 +672,7 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, void ScheduleDAGInstrs::initSUnits() { // We'll be allocating one SUnit for each real instruction in the region, // which is contained within a basic block. - SUnits.reserve(BB->size()); + SUnits.reserve(NumRegionInstrs); for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { MachineInstr *MI = I; @@ -693,10 +694,22 @@ void ScheduleDAGInstrs::initSUnits() { /// DAG builder is an efficient place to do it because it already visits /// operands. void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, - RegPressureTracker *RPTracker) { + RegPressureTracker *RPTracker, + PressureDiffs *PDiffs) { + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI + : ST.useAA(); + AliasAnalysis *AAForDep = UseAA ? AA : 0; + + MISUnitMap.clear(); + ScheduleDAG::clearDAG(); + // Create an SUnit for each real instruction. initSUnits(); + if (PDiffs) + PDiffs->init(SUnits.size()); + // We build scheduling units by walking a block's instruction list from bottom // to top. @@ -722,10 +735,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, Uses.setUniverse(TRI->getNumRegs()); assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); - // FIXME: Allow SparseSet to reserve space for the creation of virtual - // registers during scheduling. Don't artificially inflate the Universe - // because we want to assert that vregs are not created during DAG building. + VRegUses.clear(); VRegDefs.setUniverse(MRI.getNumVirtRegs()); + VRegUses.setUniverse(MRI.getNumVirtRegs()); // Model data dependencies between instructions being scheduled and the // ExitSU. @@ -745,17 +757,18 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, DbgMI = MI; continue; } + SUnit *SU = MISUnitMap[MI]; + assert(SU && "No SUnit mapped to this MI"); + if (RPTracker) { - RPTracker->recede(); + PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : 0; + RPTracker->recede(/*LiveUses=*/0, PDiff); assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI"); } assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) && "Cannot schedule terminators or labels!"); - SUnit *SU = MISUnitMap[MI]; - assert(SU && "No SUnit mapped to this MI"); - // Add register-based dependencies (data, anti, and output). bool HasVRegDef = false; for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { @@ -833,20 +846,20 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, unsigned ChainLatency = 0; if (AliasChain->getInstr()->mayLoad()) ChainLatency = TrueMemOrderLatency; - addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes, ChainLatency); } AliasChain = SU; for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); + addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes); for (MapVector<const Value *, std::vector<SUnit *> >::iterator I = AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes, TrueMemOrderLatency); } adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, @@ -855,7 +868,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, AliasMemDefs.clear(); AliasMemUses.clear(); } else if (MI->mayStore()) { - SmallVector<std::pair<const Value *, bool>, 4> Objs; + UnderlyingObjectsVector Objs; getUnderlyingObjectsForInstr(MI, MFI, Objs); if (Objs.empty()) { @@ -864,10 +877,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, } bool MayAlias = false; - for (SmallVector<std::pair<const Value *, bool>, 4>::iterator - K = Objs.begin(), KE = Objs.end(); K != KE; ++K) { - const Value *V = K->first; - bool ThisMayAlias = K->second; + for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end(); + K != KE; ++K) { + const Value *V = K->getPointer(); + bool ThisMayAlias = K->getInt(); if (ThisMayAlias) MayAlias = true; @@ -879,7 +892,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, MapVector<const Value *, SUnit *>::iterator IE = ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); + addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes, + 0, true); I->second = SU; } else { if (ThisMayAlias) @@ -894,7 +908,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) - addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, J->second[i], RejectMemNodes, TrueMemOrderLatency, true); J->second.clear(); } @@ -903,11 +917,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // Add dependencies from all the PendingLoads, i.e. loads // with no underlying object. for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); // Add dependence on alias chain, if needed. if (AliasChain) - addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); + addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes); // But we also should check dependent instructions for the // SU in question. adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, @@ -929,7 +943,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (MI->isInvariantLoad(AA)) { // Invariant load, no chain dependencies needed! } else { - SmallVector<std::pair<const Value *, bool>, 4> Objs; + UnderlyingObjectsVector Objs; getUnderlyingObjectsForInstr(MI, MFI, Objs); if (Objs.empty()) { @@ -937,7 +951,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // potentially aliasing stores. for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); + addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes); PendingLoads.push_back(SU); MayAlias = true; @@ -945,10 +959,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, MayAlias = false; } - for (SmallVector<std::pair<const Value *, bool>, 4>::iterator + for (UnderlyingObjectsVector::iterator J = Objs.begin(), JE = Objs.end(); J != JE; ++J) { - const Value *V = J->first; - bool ThisMayAlias = J->second; + const Value *V = J->getPointer(); + bool ThisMayAlias = J->getInt(); if (ThisMayAlias) MayAlias = true; @@ -959,7 +973,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, MapVector<const Value *, SUnit *>::iterator IE = ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); + addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes, + 0, true); if (ThisMayAlias) AliasMemUses[V].push_back(SU); else @@ -969,7 +984,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0); // Add dependencies on alias and barrier chains, if needed. if (MayAlias && AliasChain) - addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); + addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes); if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Barrier)); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2e09ec0..43f72c5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -35,6 +35,8 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; @@ -43,6 +45,7 @@ STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); +STATISTIC(SlicedLoads, "Number of load sliced"); namespace { static cl::opt<bool> @@ -53,6 +56,14 @@ namespace { CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Include global information in alias analysis")); + /// Hidden option to stress test load slicing, i.e., when this option + /// is enabled, load slicing bypasses most of its profitability guards. + static cl::opt<bool> + StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, + cl::desc("Bypass the profitability model of load " + "slicing"), + cl::init(false)); + //------------------------------ DAGCombiner ---------------------------------// class DAGCombiner { @@ -62,6 +73,7 @@ namespace { CodeGenOpt::Level OptLevel; bool LegalOperations; bool LegalTypes; + bool ForCodeSize; // Worklist of all of the nodes that need to be simplified. // @@ -144,6 +156,7 @@ namespace { bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); + bool SliceUpLoad(SDNode *N); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); @@ -154,8 +167,8 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); - void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, - SDValue Trunc, SDValue ExtLoad, DebugLoc DL, + void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, + SDValue Trunc, SDValue ExtLoad, SDLoc DL, ISD::NodeType ExtType); /// combine - call the node-specific routine that knows how to fold each @@ -246,18 +259,18 @@ namespace { SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); - SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); + SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); SDValue visitShiftByConstant(SDNode *N, unsigned Amt); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); - SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2); - SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, + SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2); + SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare = false); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, - DebugLoc DL, bool foldBooleans = true); + SDLoc DL, bool foldBooleans = true); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); @@ -267,7 +280,7 @@ namespace { SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); - SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); + SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue TransformFPLoadStorePair(SDNode *N); @@ -279,15 +292,15 @@ namespace { /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void GatherAllAliases(SDNode *N, SDValue OriginalChain, - SmallVector<SDValue, 8> &Aliases); + SmallVectorImpl<SDValue> &Aliases); /// isAlias - Return true if there is any possibility that the two addresses /// overlap. - bool isAlias(SDValue Ptr1, int64_t Size1, + bool isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, const Value *SrcValue1, int SrcValueOffset1, unsigned SrcValueAlign1, const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, + SDValue Ptr2, int64_t Size2, bool IsVolatile2, const Value *SrcValue2, int SrcValueOffset2, unsigned SrcValueAlign2, const MDNode *TBAAInfo2) const; @@ -299,7 +312,7 @@ namespace { /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, + SDValue &Ptr, int64_t &Size, bool &IsVolatile, const Value *&SrcValue, int &SrcValueOffset, unsigned &SrcValueAlignment, const MDNode *&TBAAInfo) const; @@ -315,8 +328,15 @@ namespace { public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) - : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), - OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), + OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { + AttributeSet FnAttrs = + DAG.getMachineFunction().getFunction()->getAttributes(); + ForCodeSize = + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize) || + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + } /// Run - runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); @@ -326,7 +346,11 @@ namespace { /// getShiftAmountTy - Returns a type large enough to hold any valid /// shift amount - before type legalization these can be huge. EVT getShiftAmountTy(EVT LHSTy) { - return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy(); + assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); + if (LHSTy.isVector()) + return LHSTy; + return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) + : TLI.getPointerTy(); } /// isTypeLegal - This method returns true if we are running before type @@ -335,6 +359,12 @@ namespace { if (!LegalTypes) return true; return TLI.isTypeLegal(VT); } + + /// getSetCCResultType - Convenience wrapper around + /// TargetLowering::getSetCCResultType + EVT getSetCCResultType(EVT VT) const { + return TLI.getSetCCResultType(*DAG.getContext(), VT); + } }; } @@ -482,12 +512,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, if (isNegatibleForFree(Op.getOperand(0), LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options, Depth+1)) - return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), Op.getOperand(1)); // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, Depth+1), Op.getOperand(0)); @@ -501,7 +531,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, return Op.getOperand(1); // fold (fneg (fsub A, B)) -> (fsub B, A) - return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(0)); case ISD::FMUL: @@ -512,24 +542,24 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, if (isNegatibleForFree(Op.getOperand(0), LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options, Depth+1)) - return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), Op.getOperand(1)); // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) - return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0), GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, Depth+1)); case ISD::FP_EXTEND: case ISD::FSIN: - return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1)); case ISD::FP_ROUND: - return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), Op.getOperand(1)); @@ -573,7 +603,7 @@ static bool isOneUseSetCC(SDValue N) { return false; } -SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, +SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { @@ -587,7 +617,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, } if (N0.hasOneUse()) { // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use - SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorkList(OpNode.getNode()); return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); @@ -605,7 +635,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, } if (N1.hasOneUse()) { // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use - SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); AddToWorkList(OpNode.getNode()); return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); @@ -706,7 +736,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { } void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { - DebugLoc dl = Load->getDebugLoc(); + SDLoc dl(Load); EVT VT = Load->getValueType(0); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); @@ -725,7 +755,7 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { Replace = false; - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) @@ -735,9 +765,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { Replace = true; return DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - MemVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + MemVT, LD->getMemOperand()); } unsigned Opc = Op.getOpcode(); @@ -767,7 +795,7 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) return SDValue(); EVT OldVT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (NewOp.getNode() == 0) @@ -782,7 +810,7 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { EVT OldVT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (NewOp.getNode() == 0) @@ -845,7 +873,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(Opc, dl, PVT, NN0, NN1)); } @@ -892,7 +920,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); } @@ -923,7 +951,7 @@ SDValue DAGCombiner::PromoteExtend(SDValue Op) { // fold (aext (sext x)) -> (sext x) DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); - return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0)); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); } return SDValue(); } @@ -948,7 +976,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDNode *N = Op.getNode(); LoadSDNode *LD = cast<LoadSDNode>(N); EVT MemVT = LD->getMemoryVT(); @@ -958,9 +986,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { : LD->getExtensionType(); SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - MemVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + MemVT, LD->getMemOperand()); SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); DEBUG(dbgs() << "\nPromoting "; @@ -1008,7 +1034,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // try and combine it. while (!WorkListContents.empty()) { SDNode *N; - // The WorkListOrder holds the SDNodes in order, but it may contain duplicates. + // The WorkListOrder holds the SDNodes in order, but it may contain + // duplicates. // In order to avoid a linear scan, we use a set (O(log N)) to hold what the // worklist *should* contain, and check the node we want to visit is should // actually be visited. @@ -1245,7 +1272,7 @@ static SDValue getInputChainForNode(SDNode *N) { if (unsigned NumOps = N->getNumOperands()) { if (N->getOperand(0).getValueType() == MVT::Other) return N->getOperand(0); - else if (N->getOperand(NumOps-1).getValueType() == MVT::Other) + if (N->getOperand(NumOps-1).getValueType() == MVT::Other) return N->getOperand(NumOps-1); for (unsigned i = 1; i < NumOps-1; ++i) if (N->getOperand(i).getValueType() == MVT::Other) @@ -1320,7 +1347,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Result = DAG.getEntryNode(); } else { // New and improved token factor. - Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, &Ops[0], Ops.size()); } @@ -1350,7 +1377,7 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { } static -SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, +SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1, SelectionDAG &DAG) { EVT VT = N0.getValueType(); SDValue N00 = N0.getOperand(0); @@ -1360,10 +1387,10 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && isa<ConstantSDNode>(N00.getOperand(1))) { // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) - N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, - DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT, + N0 = DAG.getNode(ISD::ADD, SDLoc(N0), VT, + DAG.getNode(ISD::SHL, SDLoc(N00), VT, N00.getOperand(0), N01), - DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT, + DAG.getNode(ISD::SHL, SDLoc(N01), VT, N00.getOperand(1), N01)); return DAG.getNode(ISD::ADD, DL, VT, N0, N1); } @@ -1400,7 +1427,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); // fold (add x, 0) -> x if (N1C && N1C->isNullValue()) return N0; @@ -1408,28 +1435,28 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, + return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, GA->getOffset() + (uint64_t)N1C->getSExtValue()); // fold ((c1-A)+c2) -> (c1+c2)-A if (N1C && N0.getOpcode() == ISD::SUB) if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(N1C->getAPIntValue()+ N0C->getAPIntValue(), VT), N0.getOperand(1)); // reassociate add - SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1); + SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1); if (RADD.getNode() != 0) return RADD; // fold ((0-A) + B) -> B-A if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); // fold (A + (0-B)) -> A-B if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); // fold (A+(B-A)) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) return N1.getOperand(0); @@ -1439,18 +1466,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // fold (A+(B-(A+C))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(0)) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1).getOperand(1)); // fold (A+(B-(C+A))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(1)) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1).getOperand(0)); // fold (A+((B-A)+or-C)) to (B+or-C) if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && N1.getOperand(0).getOpcode() == ISD::SUB && N0 == N1.getOperand(0).getOperand(1)) - return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT, + return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, N1.getOperand(0).getOperand(0), N1.getOperand(1)); // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant @@ -1461,9 +1488,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N11 = N1.getOperand(1); if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, - DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10), - DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, + DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), + DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); } if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) @@ -1481,17 +1508,17 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); } } // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) { - SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG); + SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG); if (Result.getNode()) return Result; } if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { - SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG); + SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG); if (Result.getNode()) return Result; } @@ -1501,8 +1528,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) if (C->getAPIntValue() == 0) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, - DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, + DAG.getNode(ISD::SHL, SDLoc(N), VT, N1.getOperand(0).getOperand(1), N1.getOperand(1))); if (N0.getOpcode() == ISD::SHL && @@ -1510,8 +1537,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) if (C->getAPIntValue() == 0) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, - DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, + DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0).getOperand(1), N0.getOperand(1))); @@ -1524,7 +1551,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) // and similar xforms where the inner op is either ~0 or 0. if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); } } @@ -1533,7 +1560,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.getOperand(0).getValueType() == MVT::i1 && !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } @@ -1550,18 +1577,18 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // If the flag result is dead, turn this into an ADD. if (!N->hasAnyUseOfValue(1)) - return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1), + return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Glue)); + SDLoc(N), MVT::Glue)); // canonicalize constant to RHS. if (N0C && !N1C) - return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); + return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); // fold (addc x, 0) -> x + no carry out if (N1C && N1C->isNullValue()) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Glue)); + SDLoc(N), MVT::Glue)); // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. APInt LHSZero, LHSOne; @@ -1574,9 +1601,9 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) - return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), + return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Glue)); + SDLoc(N), MVT::Glue)); } return SDValue(); @@ -1591,30 +1618,25 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), + return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), N1, N0, CarryIn); // fold (adde x, y, false) -> (addc x, y) if (CarryIn.getOpcode() == ISD::CARRY_FALSE) - return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1); + return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); return SDValue(); } // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. -static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT, - SelectionDAG &DAG, bool LegalOperations) { - if (!VT.isVector()) { +static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, + SelectionDAG &DAG, + bool LegalOperations, bool LegalTypes) { + if (!VT.isVector()) + return DAG.getConstant(0, VT); + if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) return DAG.getConstant(0, VT); - } - if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { - // Produce a vector of zeros. - SDValue El = DAG.getConstant(0, VT.getVectorElementType()); - std::vector<SDValue> Ops(VT.getVectorNumElements(), El); - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, - &Ops[0], Ops.size()); - } return SDValue(); } @@ -1640,17 +1662,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // fold (sub x, x) -> 0 // FIXME: Refactor this and xor and other similar operations together. if (N0 == N1) - return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); + return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // fold (sub c1, c2) -> c1-c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); // fold (sub x, c) -> (add x, -c) if (N1C) - return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, DAG.getConstant(-N1C->getAPIntValue(), VT)); // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) if (N0C && N0C->isAllOnesValue()) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold A-(A-B) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) return N1.getOperand(1); @@ -1664,7 +1686,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), VT); - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC, N1.getOperand(0)); } // fold ((A+(B+or-C))-B) -> A+or-C @@ -1672,19 +1694,19 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { (N0.getOperand(1).getOpcode() == ISD::SUB || N0.getOperand(1).getOpcode() == ISD::ADD) && N0.getOperand(1).getOperand(0) == N1) - return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT, + return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1).getOperand(1)); // fold ((A+(C+B))-B) -> A+C if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD && N0.getOperand(1).getOperand(1) == N1) - return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1).getOperand(0)); // fold ((A-(B-C))-C) -> A-B if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB && N0.getOperand(1).getOperand(1) == N1) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1).getOperand(0)); // If either operand of a sub is undef, the result is undef @@ -1698,7 +1720,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { // fold (sub Sym, c) -> Sym-c if (N1C && GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, + return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, GA->getOffset() - (uint64_t)N1C->getSExtValue()); // fold (sub Sym+c1, Sym+c2) -> c1-c2 @@ -1720,25 +1742,25 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) { // If the flag result is dead, turn this into an SUB. if (!N->hasAnyUseOfValue(1)) - return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1), - DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // fold (subc x, x) -> 0 + no borrow if (N0 == N1) return CombineTo(N, DAG.getConstant(0, VT), - DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // fold (subc x, 0) -> x + no borrow if (N1C && N1C->isNullValue()) - return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow if (N0C && N0C->isAllOnesValue()) - return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0), - DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); return SDValue(); @@ -1751,63 +1773,102 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) { // fold (sube x, y, false) -> (subc x, y) if (CarryIn.getOpcode() == ISD::CARRY_FALSE) - return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1); + return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); return SDValue(); } +/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose +/// elements are all the same constant or undefined. +static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { + BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); + if (!C) + return false; + + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + EVT EltVT = N->getValueType(0).getVectorElementType(); + return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs) && + EltVT.getSizeInBits() >= SplatBitSize); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); + // fold (mul x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + + bool N0IsConst = false; + bool N1IsConst = false; + APInt ConstValue0, ConstValue1; // fold vector ops if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); + N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); + } else { + N0IsConst = dyn_cast<ConstantSDNode>(N0) != 0; + ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() + : APInt(); + N1IsConst = dyn_cast<ConstantSDNode>(N1) != 0; + ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() + : APInt(); } - // fold (mul x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); // fold (mul c1, c2) -> c1*c2 - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C); + if (N0IsConst && N1IsConst) + return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode()); + // canonicalize constant to RHS - if (N0C && !N1C) - return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0); + if (N0IsConst && !N1IsConst) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 - if (N1C && N1C->isNullValue()) + if (N1IsConst && ConstValue1 == 0) return N1; + // We require a splat of the entire scalar bit width for non-contiguous + // bit patterns. + bool IsFullSplat = + ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); + // fold (mul x, 1) -> x + if (N1IsConst && ConstValue1 == 1 && IsFullSplat) + return N0; // fold (mul x, -1) -> 0-x - if (N1C && N1C->isAllOnesValue()) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + if (N1IsConst && ConstValue1.isAllOnesValue()) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), N0); // fold (mul x, (1 << c)) -> x << c - if (N1C && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, - DAG.getConstant(N1C->getAPIntValue().logBase2(), + if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, + DAG.getConstant(ConstValue1.logBase2(), getShiftAmountTy(N0.getValueType()))); // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c - if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { - unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); + if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { + unsigned Log2Val = (-ConstValue1).logBase2(); // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), - DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, DAG.getConstant(Log2Val, getShiftAmountTy(N0.getValueType())))); } + + APInt Val; // (mul (shl X, c1), c2) -> (mul X, c2 << c1) - if (N1C && N0.getOpcode() == ISD::SHL && - isa<ConstantSDNode>(N0.getOperand(1))) { - SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + if (N1IsConst && N0.getOpcode() == ISD::SHL && + (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1)))) { + SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); AddToWorkList(C3.getNode()); - return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); } @@ -1816,7 +1877,9 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { { SDValue Sh(0,0), Y(0,0); // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). - if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && + if (N0.getOpcode() == ISD::SHL && + (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1))) && N0.getNode()->hasOneUse()) { Sh = N0; Y = N1; } else if (N1.getOpcode() == ISD::SHL && @@ -1826,24 +1889,25 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } if (Sh.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y); - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1)); } } // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) - if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && - isa<ConstantSDNode>(N0.getOperand(1))) - return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, - DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT, + if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1)))) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, + DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1), - DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT, + DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1)); // reassociate mul - SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1); + SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1); if (RMUL.getNode() != 0) return RMUL; @@ -1871,13 +1935,13 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return N0; // fold (sdiv X, -1) -> 0-X if (N1C && N1C->isAllOnesValue()) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), N0); // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 if (!VT.isVector()) { if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(), + return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), N0, N1); } // fold (sdiv X, pow2) -> simple ops after legalize @@ -1892,19 +1956,19 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); // Splat the sign bit into the register - SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, + SDValue SGN = DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, DAG.getConstant(VT.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); AddToWorkList(SGN.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; - SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN, + SDValue SRL = DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, DAG.getConstant(VT.getSizeInBits() - lg2, getShiftAmountTy(SGN.getValueType()))); - SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL); + SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); AddToWorkList(SRL.getNode()); AddToWorkList(ADD.getNode()); // Divide by pow2 - SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD, + SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); // If we're dividing by a positive value, we're done. Otherwise, we must @@ -1913,7 +1977,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return SRA; AddToWorkList(SRA.getNode()); - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); } @@ -1952,7 +2016,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); // fold (udiv x, (1 << c)) -> x >>u c if (N1C && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, DAG.getConstant(N1C->getAPIntValue().logBase2(), getShiftAmountTy(N0.getValueType()))); // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 @@ -1960,13 +2024,13 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { EVT ADDVT = N1.getOperand(1).getValueType(); - SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, + SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT, N1.getOperand(1), DAG.getConstant(SHC->getAPIntValue() .logBase2(), ADDVT)); AddToWorkList(Add.getNode()); - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add); + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); } } } @@ -2000,19 +2064,19 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 if (!VT.isVector()) { if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1); } // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. if (N1C && !N1C->isNullValue()) { - SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1); + SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); AddToWorkList(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, OptimizedDiv, N1); - SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); AddToWorkList(Mul.getNode()); return Sub; } @@ -2040,18 +2104,18 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); // fold (urem x, pow2) -> (and x, pow2-1) if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, DAG.getConstant(N1C->getAPIntValue()-1,VT)); // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) if (N1.getOpcode() == ISD::SHL) { if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { SDValue Add = - DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, + DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT)); AddToWorkList(Add.getNode()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); } } } @@ -2059,13 +2123,13 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. if (N1C && !N1C->isNullValue()) { - SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1); + SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); AddToWorkList(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, OptimizedDiv, N1); - SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); AddToWorkList(Mul.getNode()); return Sub; } @@ -2086,14 +2150,14 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // fold (mulhs x, 0) -> 0 if (N1C && N1C->isNullValue()) return N1; // fold (mulhs x, 1) -> (sra x, size(x)-1) if (N1C && N1C->getAPIntValue() == 1) - return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0, + return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0, DAG.getConstant(N0.getValueType().getSizeInBits() - 1, getShiftAmountTy(N0.getValueType()))); // fold (mulhs x, undef) -> 0 @@ -2124,7 +2188,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // fold (mulhu x, 0) -> 0 if (N1C && N1C->isNullValue()) @@ -2166,7 +2230,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, if (!HiExists && (!LegalOperations || TLI.isOperationLegal(LoOp, N->getValueType(0)))) { - SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->op_begin(), N->getNumOperands()); return CombineTo(N, Res, Res); } @@ -2176,7 +2240,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, if (!LoExists && (!LegalOperations || TLI.isOperationLegal(HiOp, N->getValueType(1)))) { - SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->op_begin(), N->getNumOperands()); return CombineTo(N, Res, Res); } @@ -2187,7 +2251,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, // If the two computed results can be simplified separately, separate them. if (LoExists) { - SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->op_begin(), N->getNumOperands()); AddToWorkList(Lo.getNode()); SDValue LoOpt = combine(Lo.getNode()); @@ -2198,7 +2262,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, } if (HiExists) { - SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->op_begin(), N->getNumOperands()); AddToWorkList(Hi.getNode()); SDValue HiOpt = combine(Hi.getNode()); @@ -2216,7 +2280,7 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { if (Res.getNode()) return Res; EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. @@ -2246,7 +2310,7 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { if (Res.getNode()) return Res; EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. @@ -2275,7 +2339,7 @@ SDValue DAGCombiner::visitSMULO(SDNode *N) { // (smulo x, 2) -> (saddo x, x) if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) if (C2->getAPIntValue() == 2) - return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(), + return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), N->getOperand(0), N->getOperand(0)); return SDValue(); @@ -2285,7 +2349,7 @@ SDValue DAGCombiner::visitUMULO(SDNode *N) { // (umulo x, 2) -> (uaddo x, x) if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) if (C2->getAPIntValue() == 2) - return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(), + return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), N->getOperand(0), N->getOperand(0)); return SDValue(); @@ -2336,11 +2400,11 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { !VT.isVector() && Op0VT == N1.getOperand(0).getValueType() && (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { - SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); AddToWorkList(ORNode.getNode()); - return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); } // For each of OP in SHL/SRL/SRA/AND... @@ -2350,11 +2414,11 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && N0.getOperand(1) == N1.getOperand(1)) { - SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); AddToWorkList(ORNode.getNode()); - return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode, N0.getOperand(1)); } @@ -2372,7 +2436,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue In1 = N1.getOperand(0); EVT In0Ty = In0.getValueType(); EVT In1Ty = In1.getValueType(); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // If both incoming values are integers, and the original types are the // same. if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { @@ -2414,10 +2478,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { } if (SameMask) { - SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, + SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0.getOperand(0), N1.getOperand(0)); AddToWorkList(Op.getNode()); - return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op, + return DAG.getVectorShuffle(VT, SDLoc(N), Op, DAG.getUNDEF(VT), &SVN0->getMask()[0]); } } @@ -2460,7 +2524,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); // fold (and x, -1) -> x if (N1C && N1C->isAllOnesValue()) return N0; @@ -2469,7 +2533,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { APInt::getAllOnesValue(BitWidth))) return DAG.getConstant(0, VT); // reassociate and - SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1); + SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1); if (RAND.getNode() != 0) return RAND; // fold (and (or x, C), D) -> D if (C & D) == D @@ -2483,7 +2547,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { APInt Mask = ~N1C->getAPIntValue(); Mask = Mask.trunc(N0Op0.getValueSizeInBits()); if (DAG.MaskedValueIsZero(N0Op0, Mask)) { - SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N0.getValueType(), N0Op0); // Replace uses of the AND with uses of the Zero extend node. @@ -2496,7 +2560,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } - // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> + // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must // already be zero by virtue of the width of the base type of the load. // @@ -2573,7 +2637,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue NewLoad(Load, 0); if (Load->getExtensionType() == ISD::EXTLOAD) { NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, - Load->getValueType(0), Load->getDebugLoc(), + Load->getValueType(0), SDLoc(Load), Load->getChain(), Load->getBasePtr(), Load->getOffset(), Load->getMemoryVT(), Load->getMemOperand()); @@ -2604,26 +2668,39 @@ SDValue DAGCombiner::visitAND(SDNode *N) { LL.getValueType().isInteger()) { // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { - SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), LR.getValueType(), LL, RL); AddToWorkList(ORNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { - SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(), + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), LR.getValueType(), LL, RL); AddToWorkList(ANDNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); } // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { - SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), LR.getValueType(), LL, RL); AddToWorkList(ORNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } } + // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) + if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && + Op0 == Op1 && LL.getValueType().isInteger() && + Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() && + cast<ConstantSDNode>(RR)->isAllOnesValue()) || + (cast<ConstantSDNode>(LR)->isAllOnesValue() && + cast<ConstantSDNode>(RR)->isNullValue()))) { + SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), + LL, DAG.getConstant(1, LL.getValueType())); + AddToWorkList(ADDNode.getNode()); + return DAG.getSetCC(SDLoc(N), VT, ADDNode, + DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); + } // canonicalize equivalent to ll == rl if (LL == RR && LR == RL) { Op1 = ISD::getSetCCSwappedOperands(Op1); @@ -2636,8 +2713,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { (!LegalOperations || (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && TLI.isOperationLegal(ISD::SETCC, - TLI.getSetCCResultType(N0.getSimpleValueType()))))) - return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + getSetCCResultType(N0.getSimpleValueType()))))) + return DAG.getSetCC(SDLoc(N), N0.getValueType(), LL, LR, Result); } } @@ -2665,11 +2742,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + MemVT, LN0->getMemOperand()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2687,12 +2762,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, - LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2710,7 +2782,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0); if (LN0->getExtensionType() != ISD::SEXTLOAD && - LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) { + LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); @@ -2721,11 +2793,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue NewLoad = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, - LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), - ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, + LN0->getChain(), LN0->getBasePtr(), ExtVT, + LN0->getMemOperand()); AddToWorkList(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2748,7 +2818,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { unsigned LVTStoreBytes = LoadedVT.getStoreSize(); unsigned EVTStoreBytes = ExtVT.getStoreSize(); unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; - NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, + NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, NewPtr, DAG.getConstant(PtrOff, PtrType)); Alignment = MinAlign(Alignment, PtrOff); } @@ -2757,11 +2827,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue Load = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr, LN0->getPointerInfo(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - Alignment); + Alignment, LN0->getTBAAInfo()); AddToWorkList(N); CombineTo(LN0, Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2786,7 +2856,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { ADDC |= Mask; if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { SDValue NewAdd = - DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, + DAG.getNode(ISD::ADD, SDLoc(N0), VT, N0.getOperand(0), DAG.getConstant(ADDC, VT)); CombineTo(N0.getNode(), NewAdd); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2797,6 +2867,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } + // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) + if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { + SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false); + if (BSwap.getNode()) + return BSwap; + } + return SDValue(); } @@ -2881,17 +2959,27 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, if (N00 != N10) return SDValue(); - // Make sure everything beyond the low halfword is zero since the SRL 16 - // will clear the top bits. + // Make sure everything beyond the low halfword gets set to zero since the SRL + // 16 will clear the top bits. unsigned OpSizeInBits = VT.getSizeInBits(); - if (DemandHighBits && OpSizeInBits > 16 && - (!LookPassAnd0 || !LookPassAnd1) && - !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) - return SDValue(); + if (DemandHighBits && OpSizeInBits > 16) { + // If the left-shift isn't masked out then the only way this is a bswap is + // if all bits beyond the low 8 are 0. In that case the entire pattern + // reduces to a left shift anyway: leave it for other parts of the combiner. + if (!LookPassAnd0) + return SDValue(); - SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00); + // However, if the right shift isn't masked out then it might be because + // it's not needed. See if we can spot that too. + if (!LookPassAnd1 && + !DAG.MaskedValueIsZero( + N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) + return SDValue(); + } + + SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); if (OpSizeInBits > 16) - Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res, + Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res, DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); return Res; } @@ -2899,7 +2987,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, /// isBSwapHWordElement - Return true if the specified node is an element /// that makes up a 32-bit packed halfword byteswap. i.e. /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) -static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) { +static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) { if (!N.getNode()->hasOneUse()) return false; @@ -3024,19 +3112,19 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) return SDValue(); - SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, + SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, SDValue(Parts[0],0)); - // Result of the bswap should be rotated by 16. If it's not legal, than + // Result of the bswap should be rotated by 16. If it's not legal, then // do (x << 16) | (x >> 16). SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) - return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); + return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt); if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) - return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, - DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), - DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt)); + return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt); + return DAG.getNode(ISD::OR, SDLoc(N), VT, + DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt), + DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt)); } SDValue DAGCombiner::visitOR(SDNode *N) { @@ -3076,7 +3164,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); // fold (or x, 0) -> x if (N1C && N1C->isNullValue()) return N0; @@ -3096,7 +3184,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return BSwap; // reassociate or - SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); + SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1); if (ROR.getNode() != 0) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) @@ -3105,8 +3193,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { isa<ConstantSDNode>(N0.getOperand(1))) { ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, + DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); } @@ -3121,19 +3209,19 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) if (cast<ConstantSDNode>(LR)->isNullValue() && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { - SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(), + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), LR.getValueType(), LL, RL); AddToWorkList(ORNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) if (cast<ConstantSDNode>(LR)->isAllOnesValue() && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { - SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(), + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), LR.getValueType(), LL, RL); AddToWorkList(ANDNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); } } // canonicalize equivalent to ll == rl @@ -3148,8 +3236,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { (!LegalOperations || (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && TLI.isOperationLegal(ISD::SETCC, - TLI.getSetCCResultType(N0.getValueType()))))) - return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + getSetCCResultType(N0.getValueType()))))) + return DAG.getSetCC(SDLoc(N), N0.getValueType(), LL, LR, Result); } } @@ -3176,15 +3264,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { - SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1.getOperand(0)); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, + return DAG.getNode(ISD::AND, SDLoc(N), VT, X, DAG.getConstant(LHSMask | RHSMask, VT)); } } // See if this is some rotate idiom. - if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) + if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) return SDValue(Rot, 0); // Simplify the operands using demanded-bits information. @@ -3217,7 +3305,7 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. -SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { +SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); if (!TLI.isTypeLegal(VT)) return 0; @@ -3292,33 +3380,9 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if (LHSMask.getNode() || RHSMask.getNode()) return 0; - // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) - // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) - if (RHSShiftAmt.getOpcode() == ISD::SUB && - LHSShiftAmt == RHSShiftAmt.getOperand(1)) { - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { - return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } - } - } - - // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) - // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) - if (LHSShiftAmt.getOpcode() == ISD::SUB && - RHSShiftAmt == LHSShiftAmt.getOperand(1)) { - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { - return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, - HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - } - } - } - - // Look for sign/zext/any-extended or truncate cases: + // If the shift amount is sign/zext/any-extended just peel it off. + SDValue LExtOp0 = LHSShiftAmt; + SDValue RExtOp0 = RHSShiftAmt; if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || @@ -3327,37 +3391,31 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { - SDValue LExtOp0 = LHSShiftAmt.getOperand(0); - SDValue RExtOp0 = RHSShiftAmt.getOperand(0); - if (RExtOp0.getOpcode() == ISD::SUB && - RExtOp0.getOperand(1) == LExtOp0) { - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotl x, y) - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotr x, (sub 32, y)) - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { - return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, - LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } - } - } else if (LExtOp0.getOpcode() == ISD::SUB && - RExtOp0 == LExtOp0.getOperand(1)) { - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotr x, y) - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotl x, (sub 32, y)) - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { - return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, - LHSShiftArg, - HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - } - } - } + LExtOp0 = LHSShiftAmt.getOperand(0); + RExtOp0 = RHSShiftAmt.getOperand(0); + } + + if (RExtOp0.getOpcode() == ISD::SUB && RExtOp0.getOperand(1) == LExtOp0) { + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotl x, y) + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotr x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) + if (SUBC->getAPIntValue() == OpSizeInBits) + return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); + } else if (LExtOp0.getOpcode() == ISD::SUB && + RExtOp0 == LExtOp0.getOperand(1)) { + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotr x, y) + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotl x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) + if (SUBC->getAPIntValue() == OpSizeInBits) + return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, + HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); } return 0; @@ -3396,12 +3454,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold (xor x, 0) -> x if (N1C && N1C->isNullValue()) return N0; // reassociate xor - SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1); + SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1); if (RXOR.getNode() != 0) return RXOR; @@ -3417,9 +3475,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { default: llvm_unreachable("Unhandled SetCC Equivalent!"); case ISD::SETCC: - return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC); + return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); case ISD::SELECT_CC: - return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2), + return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), N0.getOperand(3), NotCC); } } @@ -3430,10 +3488,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { N0.getNode()->hasOneUse() && isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ SDValue V = N0.getOperand(0); - V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V, + V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, DAG.getConstant(1, V.getValueType())); AddToWorkList(V.getNode()); - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc @@ -3442,10 +3500,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); - return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants @@ -3454,28 +3512,36 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); - return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); } } + // fold (xor (and x, y), y) -> (and (not x), y) + if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + N0->getOperand(1) == N1) { + SDValue X = N0->getOperand(0); + SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); + AddToWorkList(NotX.getNode()); + return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); + } // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) if (N1C && N0.getOpcode() == ISD::XOR) { ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); if (N00C) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1), + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1), DAG.getConstant(N1C->getAPIntValue() ^ N00C->getAPIntValue(), VT)); if (N01C) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(N1C->getAPIntValue() ^ N01C->getAPIntValue(), VT)); } // fold (xor x, x) -> 0 if (N0 == N1) - return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); + return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) if (N0.getOpcode() == N1.getOpcode()) { @@ -3548,17 +3614,17 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { } // Fold the constants, shifting the binop RHS by the shift amount. - SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(), + SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), N->getValueType(0), LHS->getOperand(1), N->getOperand(1)); // Create the new shift. SDValue NewShift = DAG.getNode(N->getOpcode(), - LHS->getOperand(0).getDebugLoc(), + SDLoc(LHS->getOperand(0)), VT, LHS->getOperand(0), N->getOperand(1)); // Create the new binop. - return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS); + return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); } SDValue DAGCombiner::visitSHL(SDNode *N) { @@ -3569,6 +3635,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (shl c1, c2) -> c1<<c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); @@ -3598,10 +3670,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC = TruncC.trunc(TruncVT.getSizeInBits()); - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, - DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, + DAG.getNode(ISD::AND, SDLoc(N), TruncVT, DAG.getNode(ISD::TRUNCATE, - N->getDebugLoc(), + SDLoc(N), TruncVT, N100), DAG.getConstant(TruncC, TruncVT))); } @@ -3617,7 +3689,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { uint64_t c2 = N1C->getZExtValue(); if (c1 + c2 >= OpSizeInBits) return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(c1 + c2, N1.getValueType())); } @@ -3639,13 +3711,34 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (c2 >= OpSizeInBits - InnerShiftSize) { if (c1 + c2 >= OpSizeInBits) return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT, - DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT, + return DAG.getNode(ISD::SHL, SDLoc(N0), VT, + DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, N0.getOperand(0)->getOperand(0)), DAG.getConstant(c1 + c2, N1.getValueType())); } } + // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) + // Only fold this if the inner zext has no other uses to avoid increasing + // the total number of instructions. + if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::SRL && + isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { + uint64_t c1 = + cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); + if (c1 < VT.getSizeInBits()) { + uint64_t c2 = N1C->getZExtValue(); + if (c1 == c2) { + SDValue NewOp0 = N0.getOperand(0); + EVT CountVT = NewOp0.getOperand(1).getValueType(); + SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), + NewOp0, DAG.getConstant(c2, CountVT)); + AddToWorkList(NewSHL.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); + } + } + } + // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or // (and (srl x, (sub c1, c2), MASK) // Only fold this if the inner shift has no other uses -- if it does, folding @@ -3660,14 +3753,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue Shift; if (c2 > c1) { Mask = Mask.shl(c2-c1); - Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), + Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(c2-c1, N1.getValueType())); } else { Mask = Mask.lshr(c1-c2); - Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), + Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(c1-c2, N1.getValueType())); } - return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift, + return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, DAG.getConstant(Mask, VT)); } } @@ -3678,7 +3771,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { VT.getSizeInBits() - N1C->getZExtValue()), VT); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), HiBitsMask); } @@ -3699,6 +3792,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (sra c1, c2) -> (sra c1, c2) if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); @@ -3724,7 +3823,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { ExtVT, VT.getVectorNumElements()); if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), DAG.getValueType(ExtVT)); } @@ -3733,7 +3832,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; - return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(Sum, N1C->getValueType(0))); } } @@ -3765,11 +3864,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy(N0.getOperand(0).getValueType())); - SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, + SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT, N0.getOperand(0), Amt); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT, + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT, Shift); - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), N->getValueType(0), Trunc); } } @@ -3785,11 +3884,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); - return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, - DAG.getNode(ISD::AND, N->getDebugLoc(), + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, + DAG.getNode(ISD::AND, SDLoc(N), TruncVT, DAG.getNode(ISD::TRUNCATE, - N->getDebugLoc(), + SDLoc(N), TruncVT, N100), DAG.getConstant(TruncC, TruncVT))); } @@ -3812,9 +3911,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue Amt = DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType())); - SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT, + SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, N0.getOperand(0).getOperand(0), Amt); - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); } } @@ -3825,7 +3924,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // If the sign bit is known to be zero, switch this to a SRL. if (DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); if (N1C) { SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); @@ -3844,6 +3943,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (srl c1, c2) -> c1 >>u c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); @@ -3868,7 +3973,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { uint64_t c2 = N1C->getZExtValue(); if (c1 + c2 >= OpSizeInBits) return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(c1 + c2, N1.getValueType())); } @@ -3886,8 +3991,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (c1 + OpSizeInBits == InnerShiftSize) { if (c1 + c2 >= InnerShiftSize) return DAG.getConstant(0, VT); - return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT, - DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, + return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, + DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT, N0.getOperand(0)->getOperand(0), DAG.getConstant(c1 + c2, ShiftCountVT))); } @@ -3897,12 +4002,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && N0.getValueSizeInBits() <= 64) { uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(~0ULL >> ShAmt, VT)); } - - // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) + // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { // Shifting in all undef bits? EVT SmallVT = N0.getOperand(0).getValueType(); @@ -3911,11 +4015,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { uint64_t ShiftAmt = N1C->getZExtValue(); - SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, + SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, N0.getOperand(0), DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); AddToWorkList(SmallShift.getNode()); - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); + APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt); + return DAG.getNode(ISD::AND, SDLoc(N), VT, + DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), + DAG.getConstant(Mask, VT)); } } @@ -3923,7 +4030,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // bit, which is unmodified by sra. if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { if (N0.getOpcode() == ISD::SRA) - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); } // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). @@ -3951,12 +4058,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue Op = N0.getOperand(0); if (ShAmt) { - Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op, + Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); AddToWorkList(Op.getNode()); } - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + return DAG.getNode(ISD::XOR, SDLoc(N), VT, Op, DAG.getConstant(1, VT)); } } @@ -3971,11 +4078,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC = TruncC.trunc(TruncVT.getSizeInBits()); - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, - DAG.getNode(ISD::AND, N->getDebugLoc(), + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, + DAG.getNode(ISD::AND, SDLoc(N), TruncVT, DAG.getNode(ISD::TRUNCATE, - N->getDebugLoc(), + SDLoc(N), TruncVT, N100), DAG.getConstant(TruncC, TruncVT))); } @@ -4035,7 +4142,7 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { // fold (ctlz c1) -> c2 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4045,7 +4152,7 @@ SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { // fold (ctlz_zero_undef c1) -> c2 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4055,7 +4162,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { // fold (cttz c1) -> c2 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4065,7 +4172,7 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { // fold (cttz_zero_undef c1) -> c2 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4075,7 +4182,7 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { // fold (ctpop c1) -> c2 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); return SDValue(); } @@ -4100,7 +4207,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return N2; // fold (select C, 1, X) -> (or C, X) if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select C, 0, 1) -> (xor C, 1) if (VT.isInteger() && (VT0 == MVT::i1 || @@ -4110,38 +4217,38 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { SDValue XORNode; if (VT == VT0) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0, + return DAG.getNode(ISD::XOR, SDLoc(N), VT0, N0, DAG.getConstant(1, VT0)); - XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0, + XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, N0, DAG.getConstant(1, VT0)); AddToWorkList(XORNode.getNode()); if (VT.bitsGT(VT0)) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode); - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); } // fold (select C, 0, X) -> (and (not C), X) if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { - SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorkList(NOTNode.getNode()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2); + return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); } // fold (select C, X, 1) -> (or (not C), X) if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { - SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorkList(NOTNode.getNode()); - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1); + return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); } // fold (select C, X, 0) -> (and C, X) if (VT == MVT::i1 && N2C && N2C->isNullValue()) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // fold (select X, X, Y) -> (or X, Y) // fold (select X, 1, Y) -> (or X, Y) if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select X, Y, X) -> (and X, Y) // fold (select X, Y, 0) -> (and X, Y) if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N1, N2)) @@ -4155,20 +4262,37 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // about, since there is no way to mark an opcode illegal at all value types if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1, N2, N0.getOperand(2)); - return SimplifySelect(N->getDebugLoc(), N0, N1, N2); + return SimplifySelect(SDLoc(N), N0, N1, N2); } return SDValue(); } +static +std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + EVT LoVT, HiVT; + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + // Split the inputs. + SDValue Lo, Hi, LL, LH, RL, RH; + llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); + + return std::make_pair(Lo, Hi); +} + SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Canonicalize integer abs. // vselect (setg[te] X, 0), X, -X -> @@ -4201,6 +4325,34 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { } } + // If the VSELECT result requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + if (N0.getOpcode() == ISD::SETCC) { + EVT VT = N->getValueType(0); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), VT) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; + llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); + llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); + llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); + + Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); + Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); + + // Add the new VSELECT nodes to the work list in case they need to be split + // again. + AddToWorkList(Lo.getNode()); + AddToWorkList(Hi.getNode()); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + } + return SDValue(); } @@ -4217,35 +4369,37 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { return N2; // Determine if the condition we're dealing with is constant - SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), - N0, N1, CC, N->getDebugLoc(), false); - if (SCC.getNode()) AddToWorkList(SCC.getNode()); + SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), + N0, N1, CC, SDLoc(N), false); + if (SCC.getNode()) { + AddToWorkList(SCC.getNode()); - if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { - if (!SCCC->isNullValue()) - return N2; // cond always true -> true val - else - return N3; // cond always false -> false val - } + if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { + if (!SCCC->isNullValue()) + return N2; // cond always true -> true val + else + return N3; // cond always false -> false val + } - // Fold to a simpler select_cc - if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC) - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(), - SCC.getOperand(0), SCC.getOperand(1), N2, N3, - SCC.getOperand(2)); + // Fold to a simpler select_cc + if (SCC.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), N2, N3, + SCC.getOperand(2)); + } // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N2, N3)) return SDValue(N, 0); // Don't revisit N. // fold select_cc into other things, such as min/max/abs - return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC); + return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); } SDValue DAGCombiner::visitSETCC(SDNode *N) { return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), cast<CondCodeSDNode>(N->getOperand(2))->get(), - N->getDebugLoc()); + SDLoc(N)); } // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: @@ -4254,7 +4408,7 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { // mentioned transformation is profitable. static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, unsigned ExtOpc, - SmallVector<SDNode*, 4> &ExtendNodes, + SmallVectorImpl<SDNode *> &ExtendNodes, const TargetLowering &TLI) { bool HasCopyToRegUses = false; bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); @@ -4312,8 +4466,8 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, return true; } -void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, - SDValue Trunc, SDValue ExtLoad, DebugLoc DL, +void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, + SDValue Trunc, SDValue ExtLoad, SDLoc DL, ISD::NodeType ExtType) { // Extend SetCC uses if necessary. for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { @@ -4340,12 +4494,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // fold (sext c1) -> c1 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0); // fold (sext (sext x)) -> (sext x) // fold (sext (aext x)) -> (sext x) if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0.getOperand(0)); if (N0.getOpcode() == ISD::TRUNCATE) { @@ -4379,22 +4533,22 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign // bits, just sext from i32. if (NumSignBits > OpBits-MidBits) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op); } else { // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign // bits, just truncate to i32. if (NumSignBits > OpBits-MidBits) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); } // fold (sext (truncate x)) -> (sextinreg x). if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, N0.getValueType())) { if (OpBits < DestBits) - Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op); + Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); else if (OpBits > DestBits) - Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op, DAG.getValueType(N0.getValueType())); } } @@ -4412,17 +4566,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4436,15 +4588,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), MemVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -4467,23 +4617,20 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->getMemoryVT(), - LN0->isVolatile(), - LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.sext(VT.getSizeInBits()); - SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ExtLoad, DAG.getConstant(Mask, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, - N0.getOperand(0).getDebugLoc(), + SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4494,13 +4641,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. if (VT.isVector() && !LegalOperations && - TLI.getBooleanContents(true) == + TLI.getBooleanContents(true) == TargetLowering::ZeroOrNegativeOneBooleanContent) { EVT N0VT = N0.getOperand(0).getValueType(); // On some architectures (such as SSE/NEON/etc) the SETCC result type is // of the same size as the compared operands. Only optimize sext(setcc()) // if this is the case. - EVT SVT = TLI.getSetCCResultType(N0VT); + EVT SVT = getSetCCResultType(N0VT); // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result @@ -4508,24 +4655,19 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == SVT.getSizeInBits()) - return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); + // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); - + EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); if (SVT == MatchingVectorType) { - SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, + SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); } } @@ -4534,24 +4676,26 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue NegOne = DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); SDValue SCC = - SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), NegOne, DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; - if (!VT.isVector() && (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))) - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, - DAG.getSetCC(N->getDebugLoc(), - TLI.getSetCCResultType(VT), - N0.getOperand(0), N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()), - NegOne, DAG.getConstant(0, VT)); + if (!VT.isVector() && + (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) { + return DAG.getSelect(SDLoc(N), VT, + DAG.getSetCC(SDLoc(N), + getSetCCResultType(VT), + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()), + NegOne, DAG.getConstant(0, VT)); + } } // fold (sext x) -> (zext x) if the sign bit is known zero. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); return SDValue(); } @@ -4600,11 +4744,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext c1) -> c1 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); // fold (zext (zext x)) -> (zext x) // fold (zext (aext x)) -> (zext x) if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0)); // fold (zext (truncate x)) -> (zext x) or @@ -4623,9 +4767,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { VT.getSizeInBits())); if (TruncatedBits == (KnownZero & TruncatedBits)) { if (VT.bitsGT(Op.getValueType())) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); if (VT.bitsLT(Op.getValueType())) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); return Op; } @@ -4665,13 +4809,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { - Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); + Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); AddToWorkList(Op.getNode()); } else if (Op.getValueType().bitsGT(VT)) { - Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); AddToWorkList(Op.getNode()); } - return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), + return DAG.getZeroExtendInReg(Op, SDLoc(N), N0.getValueType().getScalarType()); } @@ -4685,13 +4829,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { !TLI.isZExtFree(N0.getValueType(), VT))) { SDValue X = N0.getOperand(0).getOperand(0); if (X.getValueType().bitsLT(VT)) { - X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X); + X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X); } else if (X.getValueType().bitsGT(VT)) { - X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); } APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, X, DAG.getConstant(Mask, VT)); } @@ -4708,18 +4852,16 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4741,23 +4883,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, SetCCs, TLI); if (DoXform) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->getMemoryVT(), - LN0->isVolatile(), - LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ExtLoad, DAG.getConstant(Mask, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, - N0.getOperand(0).getDebugLoc(), + SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4772,15 +4911,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), MemVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -4801,11 +4938,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // for that matter). Check to see that they are the same size. If so, // we know that the element size of the sext'd result matches the // element size of the compare operands. - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::AND, SDLoc(N), VT, + DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()), - DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &OneOps[0], OneOps.size())); // If the desired elements are smaller or larger than the source @@ -4818,18 +4955,18 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); SDValue VsetCC = - DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT), - DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, + DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT), + DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &OneOps[0], OneOps.size())); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDValue SCC = - SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, VT), DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; @@ -4852,7 +4989,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { return SDValue(); } - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Ensure that the shift amount is wide enough for the shifted value. if (VT.getSizeInBits() >= 256) @@ -4872,14 +5009,14 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext c1) -> c1 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, N0); // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) // fold (aext (sext x)) -> (sext x) if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND) - return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); // fold (aext (truncate (load x))) -> (aext (smaller load x)) // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) @@ -4902,8 +5039,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { if (TruncOp.getValueType() == VT) return TruncOp; // x iff x size == zext size. if (TruncOp.getValueType().bitsGT(VT)) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp); - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp); + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp); } // Fold (aext (and (trunc x), cst)) -> (and x, cst) @@ -4915,13 +5052,13 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.getValueType())) { SDValue X = N0.getOperand(0).getOperand(0); if (X.getValueType().bitsLT(VT)) { - X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X); + X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X); } else if (X.getValueType().bitsGT(VT)) { - X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X); + X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); } APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, X, DAG.getConstant(Mask, VT)); } @@ -4938,17 +5075,15 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ANY_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4962,14 +5097,12 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.hasOneUse()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); - SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + MemVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -4986,7 +5119,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == N0VT.getSizeInBits()) - return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source @@ -5000,16 +5133,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); SDValue VsetCC = - DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); } } // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDValue SCC = - SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, VT), DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) @@ -5030,9 +5163,8 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { assert(CV != 0 && "Const value should be ConstSDNode."); const APInt &CVal = CV->getAPIntValue(); APInt NewVal = CVal & Mask; - if (NewVal != CVal) { + if (NewVal != CVal) return DAG.getConstant(NewVal, V.getValueType()); - } break; } case ISD::OR: @@ -5056,7 +5188,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { APInt NewMask = Mask << Amt; SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); if (SimplifyLHS.getNode()) - return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(), + return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, V.getOperand(1)); } } @@ -5160,12 +5292,19 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // For the transform to be legal, the load must produce only two values // (the value loaded and the chain). Don't transform a pre-increment - // load, for example, which produces an extra value. Otherwise the + // load, for example, which produces an extra value. Otherwise the // transformation is not equivalent, and the downstream logic to replace // uses gets things wrong. if (LN0->getNumValues() > 2) return SDValue(); + // If the load that we're shrinking is an extload and we're not just + // discarding the extension we can't simply shrink the load. Bail. + // TODO: It would be possible to merge the extensions in some cases. + if (LN0->getExtensionType() != ISD::NON_EXTLOAD && + LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) + return SDValue(); + EVT PtrType = N0.getOperand(1).getValueType(); if (PtrType == MVT::Untyped || PtrType.isExtended()) @@ -5182,22 +5321,22 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); - SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), + SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, LN0->getBasePtr(), DAG.getConstant(PtrOff, PtrType)); AddToWorkList(NewPtr.getNode()); SDValue Load; if (ExtType == ISD::NON_EXTLOAD) - Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, + Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), NewAlign); + LN0->isInvariant(), NewAlign, LN0->getTBAAInfo()); else - Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr, + Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - NewAlign); + NewAlign, LN0->getTBAAInfo()); // Replace the old load's chain with the new load's chain. WorkListRemover DeadNodes(*this); @@ -5216,7 +5355,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (ShLeftAmt >= VT.getSizeInBits()) Result = DAG.getConstant(0, VT); else - Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, + Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT, Result, DAG.getConstant(ShLeftAmt, ShImmTy)); } @@ -5234,7 +5373,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_in_reg c1) -> c1 if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) @@ -5242,10 +5381,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && - EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) { - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), N1); - } // fold (sext_in_reg (sext x)) -> (sext x) // fold (sext_in_reg (aext x)) -> (sext x) @@ -5254,12 +5392,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue N00 = N0.getOperand(0); if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); } // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) - return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT); + return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT); // fold operands of sext_in_reg based on knowledge that the top bits are not // demanded. @@ -5282,7 +5420,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // extended enough. unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) - return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1)); } } @@ -5294,12 +5432,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - EVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), EVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); AddToWorkList(ExtLoad.getNode()); @@ -5312,12 +5448,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - EVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), EVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -5328,7 +5462,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), N0.getOperand(1), false); if (BSwap.getNode() != 0) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } @@ -5345,21 +5479,21 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return N0; // fold (truncate c1) -> c1 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); // fold (truncate (ext x)) -> (ext x) or (truncate x) or x if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { if (N0.getOperand(0).getValueType().bitsLT(VT)) // if the source is smaller than the dest, we still need an extend - return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); if (N0.getOperand(0).getValueType().bitsGT(VT)) // if the source is larger than the dest, than we just need the truncate - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); // if the source and dest are the same type, we can drop both the extend // and the truncate. return N0.getOperand(0); @@ -5391,14 +5525,14 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue EltNo = N0->getOperand(1); if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - EVT IndexTy = N0->getOperand(1).getValueType(); + EVT IndexTy = TLI.getVectorIdxTy(); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); - SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, N0.getOperand(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - N->getDebugLoc(), TrTy, V, + SDLoc(N), TrTy, V, DAG.getConstant(Index, IndexTy)); } } @@ -5430,7 +5564,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) Opnds.push_back(BuildVect.getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Opnds[0], + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0], Opnds.size()); } } @@ -5445,7 +5579,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits())); if (Shorter.getNode()) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); } // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) @@ -5488,11 +5622,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { Opnds.push_back(DAG.getUNDEF(VTs[i])); continue; } - SDValue NV = DAG.getNode(ISD::TRUNCATE, V.getDebugLoc(), VTs[i], V); + SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); AddToWorkList(NV.getNode()); Opnds.push_back(NV); } - return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, &Opnds[0], Opnds.size()); } } @@ -5538,7 +5672,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) - return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), + return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), LD1->getPointerInfo(), false, false, false, Align); } @@ -5575,7 +5709,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // If the input is a constant, let getNode fold it. if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { - SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0); + SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); if (Res.getNode() != N) { if (!LegalOperations || TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) @@ -5592,7 +5726,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // (conv (conv x, t1), t2) -> (conv x, t2) if (N0.getOpcode() == ISD::BITCAST) - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0.getOperand(0)); // fold (conv (load x)) -> (load (conv*)x) @@ -5600,20 +5734,22 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && // Do not change the width of a volatile load. !cast<LoadSDNode>(N0)->isVolatile() && - (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && + TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); unsigned Align = TLI.getDataLayout()-> getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); unsigned OrigAlign = LN0->getAlignment(); if (Align <= OrigAlign) { - SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), + SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), LN0->getPointerInfo(), LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), OrigAlign); + LN0->isInvariant(), OrigAlign, + LN0->getTBAAInfo()); AddToWorkList(N); CombineTo(N0.getNode(), - DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), + DAG.getNode(ISD::BITCAST, SDLoc(N0), N0.getValueType(), Load), Load.getValue(1)); return Load; @@ -5623,20 +5759,20 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) // This often reduces constant pool loads. - if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || - (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && + if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || + (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector() && !N0.getValueType().isVector()) { - SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, + SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); AddToWorkList(NewConv.getNode()); APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + return DAG.getNode(ISD::XOR, SDLoc(N), VT, NewConv, DAG.getConstant(SignBit, VT)); assert(N0.getOpcode() == ISD::FABS); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, NewConv, DAG.getConstant(~SignBit, VT)); } @@ -5650,38 +5786,38 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (isTypeLegal(IntXVT)) { - SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), + SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), IntXVT, N0.getOperand(1)); AddToWorkList(X.getNode()); // If X has a different width than the result/lhs, sext it or truncate it. unsigned VTWidth = VT.getSizeInBits(); if (OrigXWidth < VTWidth) { - X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X); + X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); AddToWorkList(X.getNode()); } else if (OrigXWidth > VTWidth) { // To get the sign bit in the right place, we have to shift it right // before truncating. - X = DAG.getNode(ISD::SRL, X.getDebugLoc(), + X = DAG.getNode(ISD::SRL, SDLoc(X), X.getValueType(), X, DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); AddToWorkList(X.getNode()); - X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); AddToWorkList(X.getNode()); } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); - X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT, + X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, VT)); AddToWorkList(X.getNode()); - SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), + SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); - Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT, + Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, Cst, DAG.getConstant(~SignBit, VT)); AddToWorkList(Cst.getNode()); - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst); + return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); } } @@ -5722,8 +5858,8 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // Due to the FP element handling below calling this routine recursively, // we can end up with a scalar-to-vector node here. if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) - return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, - DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, + DAG.getNode(ISD::BITCAST, SDLoc(BV), DstEltVT, BV->getOperand(0))); SmallVector<SDValue, 8> Ops; @@ -5732,12 +5868,12 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // If the vector element type is not legal, the BUILD_VECTOR operands // are promoted and implicitly truncated. Make that explicit here. if (Op.getValueType() != SrcEltVT) - Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); - Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); + Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), DstEltVT, Op)); AddToWorkList(Ops.back().getNode()); } - return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, &Ops[0], Ops.size()); } @@ -5794,7 +5930,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { } EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, &Ops[0], Ops.size()); } @@ -5821,7 +5957,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) // Simply turn this into a SCALAR_TO_VECTOR of the new type. - return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, Ops[0]); OpVal = OpVal.lshr(DstBitSize); } @@ -5831,7 +5967,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); } - return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, &Ops[0], Ops.size()); } @@ -5850,10 +5986,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); // fold (fadd A, 0) -> A if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) @@ -5861,20 +5997,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) - return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) - return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, + return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), - DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(1), N1)); // No FP constant should be created after legalization as Instruction @@ -5883,22 +6019,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // We don't need test this condition for transformation like following, as // the DAG being transformed implies it is legal to take FP constant as // operand. - // + // // (fadd (fmul c, x), x) -> (fmul c+1, x) - // + // bool AllowNewFpConst = (Level < AfterLegalizeDAG); // If allow, fold (fadd (fneg x), x) -> 0.0 if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && - N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) { + N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) return DAG.getConstantFP(0.0, VT); - } // If allow, fold (fadd x, (fneg x)) -> 0.0 if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && - N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) { + N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) return DAG.getConstantFP(0.0, VT); - } // In unsafe math mode, we can fold chains of FADD's of the same value // into multiplications. This transform is not safe in general because @@ -5910,43 +6044,43 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); - // (fadd (fmul c, x), x) -> (fmul c+1, x) + // (fadd (fmul c, x), x) -> (fmul x, c+1) if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP00, 0), DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); } - // (fadd (fmul x, c), x) -> (fmul c+1, x) + // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP01, 0), DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); } - // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x) + // (fadd (fmul c, x), (fadd x, x)) -> (fmul x, c+2) if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(1) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP00, 0), DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(1), NewCFP); } - // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x) + // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP01, 0), DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), NewCFP); } } @@ -5955,98 +6089,93 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); - // (fadd x, (fmul c, x)) -> (fmul c+1, x) + // (fadd x, (fmul c, x)) -> (fmul x, c+1) if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP10, 0), DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); } - // (fadd x, (fmul x, c)) -> (fmul c+1, x) + // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP11, 0), DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); } - // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x) - if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(1) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + // (fadd (fadd x, x), (fmul c, x)) -> (fmul x, c+2) + if (CFP10 && !CFP11 && N0.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(1) == N0.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP10, 0), DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, - N0.getOperand(1), NewCFP); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + N1.getOperand(1), NewCFP); } - // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x) - if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) + if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N0.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP11, 0), DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, - N0.getOperand(0), NewCFP); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + N1.getOperand(0), NewCFP); } } if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) { ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); - // (fadd (fadd x, x), x) -> (fmul 3.0, x) + // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP && N0.getOperand(0) == N0.getOperand(1) && - (N0.getOperand(0) == N1)) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + (N0.getOperand(0) == N1)) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, DAG.getConstantFP(3.0, VT)); - } } if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) { ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); - // (fadd x, (fadd x, x)) -> (fmul 3.0, x) + // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && - N1.getOperand(0) == N0) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1.getOperand(0) == N0) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, DAG.getConstantFP(3.0, VT)); - } } - // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) + // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) if (AllowNewFpConst && N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0) == N1.getOperand(0)) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstantFP(4.0, VT)); - } } // FADD -> FMA combines: if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && - DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { + DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fadd (fmul x, y), z) -> (fma x, y, z) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1); - } // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1), N0); - } } return SDValue(); @@ -6058,7 +6187,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // fold vector ops if (VT.isVector()) { @@ -6068,7 +6197,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1); // fold (fsub A, 0) -> A if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) @@ -6101,8 +6230,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &DAG.getTarget().Options)) return GetNegatedExpression(N11, DAG, LegalOperations); - else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, - &DAG.getTarget().Options)) + + if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, + &DAG.getTarget().Options)) return GetNegatedExpression(N10, DAG, LegalOperations); } } @@ -6110,27 +6240,25 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // FSUB -> FMA combines: if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && - DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { + DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) return DAG.getNode(ISD::FMA, dl, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(ISD::FNEG, dl, VT, N1)); - } // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) return DAG.getNode(ISD::FMA, dl, VT, DAG.getNode(ISD::FNEG, dl, VT, N1.getOperand(0)), N1.getOperand(1), N0); - } - // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) - if (N0.getOpcode() == ISD::FNEG && + // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0).getOpcode() == ISD::FMUL && N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { SDValue N00 = N0.getOperand(0).getOperand(0); @@ -6160,10 +6288,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold (fmul c1, c2) -> c1*c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); // fold (fmul A, 0) -> 0 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) @@ -6177,21 +6305,21 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return N0; // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0); // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, GetNegatedExpression(N0, DAG, LegalOperations), GetNegatedExpression(N1, DAG, LegalOperations)); } @@ -6201,8 +6329,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL && N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), - DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(1), N1)); return SDValue(); @@ -6215,7 +6343,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (DAG.getTarget().Options.UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) @@ -6224,13 +6352,13 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return N2; } if (N0CFP && N0CFP->isExactlyValue(1.0)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); if (N1CFP && N1CFP->isExactlyValue(1.0)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N2); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); // Canonicalize (fma c, x, y) -> (fma x, c, y) if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2); + return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && @@ -6267,21 +6395,17 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } // (fma x, c, x) -> (fmul x, (c+1)) - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) { - return DAG.getNode(ISD::FMUL, dl, VT, - N0, + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) + return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, N1, DAG.getConstantFP(1.0, VT))); - } // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && - N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { - return DAG.getNode(ISD::FMUL, dl, VT, - N0, + N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) + return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, N1, DAG.getConstantFP(-1.0, VT))); - } return SDValue(); @@ -6303,7 +6427,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // fold (fdiv c1, c2) -> c1/c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) { @@ -6320,7 +6444,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || TLI.isFPImmLegal(Recip, VT))) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, DAG.getConstantFP(Recip, VT)); } @@ -6332,7 +6456,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, GetNegatedExpression(N0, DAG, LegalOperations), GetNegatedExpression(N1, DAG, LegalOperations)); } @@ -6350,7 +6474,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP) - return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1); return SDValue(); } @@ -6363,7 +6487,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { EVT VT = N->getValueType(0); if (N0CFP && N1CFP) // Constant fold - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); if (N1CFP) { const APFloat& V = N1CFP->getValueAPF(); @@ -6371,11 +6495,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) if (!V.isNegative()) { if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) - return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); } else { if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, - DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0)); + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, + DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); } } @@ -6384,22 +6508,22 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(copysign(x,z), y) -> copysign(x, y) if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1); // copysign(x, abs(y)) -> abs(x) if (N1.getOpcode() == ISD::FABS) - return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // copysign(x, copysign(y,z)) -> copysign(x, z) if (N1.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1)); // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); return SDValue(); @@ -6416,7 +6540,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) - return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); // If the input is a legal type, and SINT_TO_FP is not legal on this target, // but UINT_TO_FP is legal on this target, try to convert. @@ -6424,7 +6548,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { // If the sign bit is known to be zero, we can change this to UINT_TO_FP. if (DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); } // The next optimizations are desireable only if SELECT_CC can be lowered. @@ -6442,7 +6566,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { { N0.getOperand(0), N0.getOperand(1), DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); } // fold (sint_to_fp (zext (setcc x, y, cc))) -> @@ -6455,7 +6579,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), N0.getOperand(0).getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); } } @@ -6473,7 +6597,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) - return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); // If the input is a legal type, and UINT_TO_FP is not legal on this target, // but SINT_TO_FP is legal on this target, try to convert. @@ -6481,7 +6605,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { // If the sign bit is known to be zero, we can change this to SINT_TO_FP. if (DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); } // The next optimizations are desireable only if SELECT_CC can be lowered. @@ -6499,7 +6623,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { { N0.getOperand(0), N0.getOperand(1), DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); } } @@ -6513,7 +6637,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { // fold (fp_to_sint c1fp) -> c1 if (N0CFP) - return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); return SDValue(); } @@ -6525,7 +6649,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { // fold (fp_to_uint c1fp) -> c1 if (N0CFP) - return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); return SDValue(); } @@ -6538,7 +6662,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // fold (fp_round c1fp) -> c1fp if (N0CFP) - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); // fold (fp_round (fp_extend x)) -> x if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) @@ -6549,16 +6673,16 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // This is a value preserving truncation if both round's are. bool IsTrunc = N->getConstantOperandVal(1) == 1 && N0.getNode()->getConstantOperandVal(1) == 1; - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), DAG.getIntPtrConstant(IsTrunc)); } // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { - SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT, + SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorkList(Tmp.getNode()); - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, Tmp, N0.getOperand(1)); } @@ -6574,7 +6698,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { // fold (fp_round_inreg c1fp) -> c1fp if (N0CFP && isTypeLegal(EVT)) { SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); - return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round); + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round); } return SDValue(); @@ -6592,7 +6716,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { // fold (fp_extend c1fp) -> c1fp if (N0CFP) - return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the // value of X. @@ -6601,25 +6725,23 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue In = N0.getOperand(0); if (In.getValueType() == VT) return In; if (VT.bitsLT(In.getValueType())) - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, In, N0.getOperand(1)); - return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In); + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); } // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), - DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), + DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -6650,10 +6772,10 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { - Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, + Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); AddToWorkList(Int.getNode()); - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); } } @@ -6661,12 +6783,11 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // (fneg (fmul c, x)) -> (fmul -c, x) if (N0.getOpcode() == ISD::FMUL) { ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); - if (CFP1) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + if (CFP1) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); - } } return SDValue(); @@ -6679,7 +6800,7 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) { // fold (fceil c1) -> fceil(c1) if (N0CFP) - return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); return SDValue(); } @@ -6691,7 +6812,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { // fold (ftrunc c1) -> ftrunc(c1) if (N0CFP) - return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); return SDValue(); } @@ -6703,7 +6824,7 @@ SDValue DAGCombiner::visitFFLOOR(SDNode *N) { // fold (ffloor c1) -> ffloor(c1) if (N0CFP) - return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); return SDValue(); } @@ -6720,28 +6841,28 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // fold (fabs c1) -> fabs(c1) if (N0CFP) - return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // fold (fabs (fabs x)) -> (fabs x) if (N0.getOpcode() == ISD::FABS) return N->getOperand(0); // fold (fabs (fneg x)) -> (fabs x) // fold (fabs (fcopysign x, y)) -> (fabs x) if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0)); + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading // constant pool values. - if (!TLI.isFAbsFree(VT) && + if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { - Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, + Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); AddToWorkList(Int.getNode()); - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); } } @@ -6765,7 +6886,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (N1.getOpcode() == ISD::SETCC && TLI.isOperationLegalOrCustom(ISD::BR_CC, N1.getOperand(0).getValueType())) { - return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain, N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2); } @@ -6811,12 +6932,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (AndConst.isPowerOf2() && cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { SDValue SetCC = - DAG.getSetCC(N->getDebugLoc(), - TLI.getSetCCResultType(Op0.getValueType()), + DAG.getSetCC(SDLoc(N), + getSetCCResultType(Op0.getValueType()), Op0, DAG.getConstant(0, Op0.getValueType()), ISD::SETNE); - SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, SetCC, N2); // Don't add the new BRCond into the worklist or else SimplifySelectCC // will convert it back to (X & C1) >> C2. @@ -6861,7 +6982,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { DAG.ReplaceAllUsesOfValueWith(N1, Tmp); removeFromWorkList(TheXor); DAG.DeleteNode(TheXor); - return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, Tmp, N2); } @@ -6882,8 +7003,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { EVT SetCCVT = N1.getValueType(); if (LegalTypes) - SetCCVT = TLI.getSetCCResultType(SetCCVT); - SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), + SetCCVT = getSetCCResultType(SetCCVT); + SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1, Equal ? ISD::SETEQ : ISD::SETNE); @@ -6892,7 +7013,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { DAG.ReplaceAllUsesOfValueWith(N1, SetCC); removeFromWorkList(N1.getNode()); DAG.DeleteNode(N1.getNode()); - return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, SetCC, N2); } } @@ -6913,14 +7034,14 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { // MachineBasicBlock CFG, which is awkward. // Use SimplifySetCC to simplify SETCC's. - SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), - CondLHS, CondRHS, CC->get(), N->getDebugLoc(), + SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), + CondLHS, CondRHS, CC->get(), SDLoc(N), false); if (Simp.getNode()) AddToWorkList(Simp.getNode()); // fold to a simpler setcc if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) - return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, N->getOperand(0), Simp.getOperand(2), Simp.getOperand(0), Simp.getOperand(1), N->getOperand(4)); @@ -7118,10 +7239,10 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { SDValue Result; if (isLoad) - Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM); else - Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM); ++PreIndexedNodes; ++NodesCombined; @@ -7156,7 +7277,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // x0 * offset0 + y0 * ptr0 = t0 // knowing that // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) - // + // // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the // indexed load/store and the expresion that needs to be re-written. // @@ -7186,7 +7307,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); SDValue NewUse = DAG.getNode(Opcode, - OtherUses[i]->getDebugLoc(), + SDLoc(OtherUses[i]), OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); removeFromWorkList(OtherUses[i]); @@ -7278,7 +7399,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { for (SDNode::use_iterator III = Use->use_begin(), EEE = Use->use_end(); III != EEE; ++III) { SDNode *UseUse = *III; - if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) + if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) RealUse = true; } @@ -7295,9 +7416,9 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // Check for #2 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { SDValue Result = isLoad - ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM) - : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; @@ -7403,17 +7524,20 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getMemOperand()->getBaseAlignment()) { SDValue NewLoad = - DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), + DAG.getExtLoad(LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), - LD->isVolatile(), LD->isNonTemporal(), Align); + LD->isVolatile(), LD->isNonTemporal(), Align, + LD->getTBAAInfo()); return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } } } - if (CombinerAA) { + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : + TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + if (UseAA) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -7423,22 +7547,17 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Replace the chain to void dependency. if (LD->getExtensionType() == ISD::NON_EXTLOAD) { - ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), - BetterChain, Ptr, LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), + BetterChain, Ptr, LD->getMemOperand()); } else { - ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), LD->getValueType(0), - BetterChain, Ptr, LD->getPointerInfo(), - LD->getMemoryVT(), - LD->isVolatile(), - LD->isNonTemporal(), - LD->getAlignment()); + BetterChain, Ptr, LD->getMemoryVT(), + LD->getMemOperand()); } // Create token factor to keep old chain connected. - SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Chain, ReplLoad.getValue(1)); // Make sure the new and old chains are cleaned up. @@ -7454,9 +7573,562 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); + // Try to slice up N to more direct loads if the slices are mapped to + // different register banks or pairing can take place. + if (SliceUpLoad(N)) + return SDValue(N, 0); + return SDValue(); } +namespace { +/// \brief Helper structure used to slice a load in smaller loads. +/// Basically a slice is obtained from the following sequence: +/// Origin = load Ty1, Base +/// Shift = srl Ty1 Origin, CstTy Amount +/// Inst = trunc Shift to Ty2 +/// +/// Then, it will be rewriten into: +/// Slice = load SliceTy, Base + SliceOffset +/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 +/// +/// SliceTy is deduced from the number of bits that are actually used to +/// build Inst. +struct LoadedSlice { + /// \brief Helper structure used to compute the cost of a slice. + struct Cost { + /// Are we optimizing for code size. + bool ForCodeSize; + /// Various cost. + unsigned Loads; + unsigned Truncates; + unsigned CrossRegisterBanksCopies; + unsigned ZExts; + unsigned Shift; + + Cost(bool ForCodeSize = false) + : ForCodeSize(ForCodeSize), Loads(0), Truncates(0), + CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {} + + /// \brief Get the cost of one isolated slice. + Cost(const LoadedSlice &LS, bool ForCodeSize = false) + : ForCodeSize(ForCodeSize), Loads(1), Truncates(0), + CrossRegisterBanksCopies(0), ZExts(0), Shift(0) { + EVT TruncType = LS.Inst->getValueType(0); + EVT LoadedType = LS.getLoadedType(); + if (TruncType != LoadedType && + !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) + ZExts = 1; + } + + /// \brief Account for slicing gain in the current cost. + /// Slicing provide a few gains like removing a shift or a + /// truncate. This method allows to grow the cost of the original + /// load with the gain from this slice. + void addSliceGain(const LoadedSlice &LS) { + // Each slice saves a truncate. + const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); + if (!TLI.isTruncateFree(LS.Inst->getValueType(0), + LS.Inst->getOperand(0).getValueType())) + ++Truncates; + // If there is a shift amount, this slice gets rid of it. + if (LS.Shift) + ++Shift; + // If this slice can merge a cross register bank copy, account for it. + if (LS.canMergeExpensiveCrossRegisterBankCopy()) + ++CrossRegisterBanksCopies; + } + + Cost &operator+=(const Cost &RHS) { + Loads += RHS.Loads; + Truncates += RHS.Truncates; + CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; + ZExts += RHS.ZExts; + Shift += RHS.Shift; + return *this; + } + + bool operator==(const Cost &RHS) const { + return Loads == RHS.Loads && Truncates == RHS.Truncates && + CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && + ZExts == RHS.ZExts && Shift == RHS.Shift; + } + + bool operator!=(const Cost &RHS) const { return !(*this == RHS); } + + bool operator<(const Cost &RHS) const { + // Assume cross register banks copies are as expensive as loads. + // FIXME: Do we want some more target hooks? + unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; + unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; + // Unless we are optimizing for code size, consider the + // expensive operation first. + if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) + return ExpensiveOpsLHS < ExpensiveOpsRHS; + return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < + (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); + } + + bool operator>(const Cost &RHS) const { return RHS < *this; } + + bool operator<=(const Cost &RHS) const { return !(RHS < *this); } + + bool operator>=(const Cost &RHS) const { return !(*this < RHS); } + }; + // The last instruction that represent the slice. This should be a + // truncate instruction. + SDNode *Inst; + // The original load instruction. + LoadSDNode *Origin; + // The right shift amount in bits from the original load. + unsigned Shift; + // The DAG from which Origin came from. + // This is used to get some contextual information about legal types, etc. + SelectionDAG *DAG; + + LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL, + unsigned Shift = 0, SelectionDAG *DAG = NULL) + : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} + + LoadedSlice(const LoadedSlice &LS) + : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {} + + /// \brief Get the bits used in a chunk of bits \p BitWidth large. + /// \return Result is \p BitWidth and has used bits set to 1 and + /// not used bits set to 0. + APInt getUsedBits() const { + // Reproduce the trunc(lshr) sequence: + // - Start from the truncated value. + // - Zero extend to the desired bit width. + // - Shift left. + assert(Origin && "No original load to compare against."); + unsigned BitWidth = Origin->getValueSizeInBits(0); + assert(Inst && "This slice is not bound to an instruction"); + assert(Inst->getValueSizeInBits(0) <= BitWidth && + "Extracted slice is bigger than the whole type!"); + APInt UsedBits(Inst->getValueSizeInBits(0), 0); + UsedBits.setAllBits(); + UsedBits = UsedBits.zext(BitWidth); + UsedBits <<= Shift; + return UsedBits; + } + + /// \brief Get the size of the slice to be loaded in bytes. + unsigned getLoadedSize() const { + unsigned SliceSize = getUsedBits().countPopulation(); + assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); + return SliceSize / 8; + } + + /// \brief Get the type that will be loaded for this slice. + /// Note: This may not be the final type for the slice. + EVT getLoadedType() const { + assert(DAG && "Missing context"); + LLVMContext &Ctxt = *DAG->getContext(); + return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); + } + + /// \brief Get the alignment of the load used for this slice. + unsigned getAlignment() const { + unsigned Alignment = Origin->getAlignment(); + unsigned Offset = getOffsetFromBase(); + if (Offset != 0) + Alignment = MinAlign(Alignment, Alignment + Offset); + return Alignment; + } + + /// \brief Check if this slice can be rewritten with legal operations. + bool isLegal() const { + // An invalid slice is not legal. + if (!Origin || !Inst || !DAG) + return false; + + // Offsets are for indexed load only, we do not handle that. + if (Origin->getOffset().getOpcode() != ISD::UNDEF) + return false; + + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + + // Check that the type is legal. + EVT SliceType = getLoadedType(); + if (!TLI.isTypeLegal(SliceType)) + return false; + + // Check that the load is legal for this type. + if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) + return false; + + // Check that the offset can be computed. + // 1. Check its type. + EVT PtrType = Origin->getBasePtr().getValueType(); + if (PtrType == MVT::Untyped || PtrType.isExtended()) + return false; + + // 2. Check that it fits in the immediate. + if (!TLI.isLegalAddImmediate(getOffsetFromBase())) + return false; + + // 3. Check that the computation is legal. + if (!TLI.isOperationLegal(ISD::ADD, PtrType)) + return false; + + // Check that the zext is legal if it needs one. + EVT TruncateType = Inst->getValueType(0); + if (TruncateType != SliceType && + !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) + return false; + + return true; + } + + /// \brief Get the offset in bytes of this slice in the original chunk of + /// bits. + /// \pre DAG != NULL. + uint64_t getOffsetFromBase() const { + assert(DAG && "Missing context."); + bool IsBigEndian = + DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian(); + assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); + uint64_t Offset = Shift / 8; + unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; + assert(!(Origin->getValueSizeInBits(0) & 0x7) && + "The size of the original loaded type is not a multiple of a" + " byte."); + // If Offset is bigger than TySizeInBytes, it means we are loading all + // zeros. This should have been optimized before in the process. + assert(TySizeInBytes > Offset && + "Invalid shift amount for given loaded size"); + if (IsBigEndian) + Offset = TySizeInBytes - Offset - getLoadedSize(); + return Offset; + } + + /// \brief Generate the sequence of instructions to load the slice + /// represented by this object and redirect the uses of this slice to + /// this new sequence of instructions. + /// \pre this->Inst && this->Origin are valid Instructions and this + /// object passed the legal check: LoadedSlice::isLegal returned true. + /// \return The last instruction of the sequence used to load the slice. + SDValue loadSlice() const { + assert(Inst && Origin && "Unable to replace a non-existing slice."); + const SDValue &OldBaseAddr = Origin->getBasePtr(); + SDValue BaseAddr = OldBaseAddr; + // Get the offset in that chunk of bytes w.r.t. the endianess. + int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); + assert(Offset >= 0 && "Offset too big to fit in int64_t!"); + if (Offset) { + // BaseAddr = BaseAddr + Offset. + EVT ArithType = BaseAddr.getValueType(); + BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr, + DAG->getConstant(Offset, ArithType)); + } + + // Create the type of the loaded slice according to its size. + EVT SliceType = getLoadedType(); + + // Create the load for the slice. + SDValue LastInst = DAG->getLoad( + SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, + Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(), + Origin->isNonTemporal(), Origin->isInvariant(), getAlignment()); + // If the final type is not the same as the loaded type, this means that + // we have to pad with zero. Create a zero extend for that. + EVT FinalType = Inst->getValueType(0); + if (SliceType != FinalType) + LastInst = + DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); + return LastInst; + } + + /// \brief Check if this slice can be merged with an expensive cross register + /// bank copy. E.g., + /// i = load i32 + /// f = bitcast i32 i to float + bool canMergeExpensiveCrossRegisterBankCopy() const { + if (!Inst || !Inst->hasOneUse()) + return false; + SDNode *Use = *Inst->use_begin(); + if (Use->getOpcode() != ISD::BITCAST) + return false; + assert(DAG && "Missing context"); + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + EVT ResVT = Use->getValueType(0); + const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); + const TargetRegisterClass *ArgRC = + TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); + if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) + return false; + + // At this point, we know that we perform a cross-register-bank copy. + // Check if it is expensive. + const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo(); + // Assume bitcasts are cheap, unless both register classes do not + // explicitly share a common sub class. + if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) + return false; + + // Check if it will be merged with the load. + // 1. Check the alignment constraint. + unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment( + ResVT.getTypeForEVT(*DAG->getContext())); + + if (RequiredAlignment > getAlignment()) + return false; + + // 2. Check that the load is a legal operation for that type. + if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) + return false; + + // 3. Check that we do not have a zext in the way. + if (Inst->getValueType(0) != getLoadedType()) + return false; + + return true; + } +}; +} + +/// \brief Sorts LoadedSlice according to their offset. +struct LoadedSliceSorter { + bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) { + assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); + return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); + } +}; + +/// \brief Check that all bits set in \p UsedBits form a dense region, i.e., +/// \p UsedBits looks like 0..0 1..1 0..0. +static bool areUsedBitsDense(const APInt &UsedBits) { + // If all the bits are one, this is dense! + if (UsedBits.isAllOnesValue()) + return true; + + // Get rid of the unused bits on the right. + APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); + // Get rid of the unused bits on the left. + if (NarrowedUsedBits.countLeadingZeros()) + NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); + // Check that the chunk of bits is completely used. + return NarrowedUsedBits.isAllOnesValue(); +} + +/// \brief Check whether or not \p First and \p Second are next to each other +/// in memory. This means that there is no hole between the bits loaded +/// by \p First and the bits loaded by \p Second. +static bool areSlicesNextToEachOther(const LoadedSlice &First, + const LoadedSlice &Second) { + assert(First.Origin == Second.Origin && First.Origin && + "Unable to match different memory origins."); + APInt UsedBits = First.getUsedBits(); + assert((UsedBits & Second.getUsedBits()) == 0 && + "Slices are not supposed to overlap."); + UsedBits |= Second.getUsedBits(); + return areUsedBitsDense(UsedBits); +} + +/// \brief Adjust the \p GlobalLSCost according to the target +/// paring capabilities and the layout of the slices. +/// \pre \p GlobalLSCost should account for at least as many loads as +/// there is in the slices in \p LoadedSlices. +static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, + LoadedSlice::Cost &GlobalLSCost) { + unsigned NumberOfSlices = LoadedSlices.size(); + // If there is less than 2 elements, no pairing is possible. + if (NumberOfSlices < 2) + return; + + // Sort the slices so that elements that are likely to be next to each + // other in memory are next to each other in the list. + std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter()); + const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); + // First (resp. Second) is the first (resp. Second) potentially candidate + // to be placed in a paired load. + const LoadedSlice *First = NULL; + const LoadedSlice *Second = NULL; + for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, + // Set the beginning of the pair. + First = Second) { + + Second = &LoadedSlices[CurrSlice]; + + // If First is NULL, it means we start a new pair. + // Get to the next slice. + if (!First) + continue; + + EVT LoadedType = First->getLoadedType(); + + // If the types of the slices are different, we cannot pair them. + if (LoadedType != Second->getLoadedType()) + continue; + + // Check if the target supplies paired loads for this type. + unsigned RequiredAlignment = 0; + if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { + // move to the next pair, this type is hopeless. + Second = NULL; + continue; + } + // Check if we meet the alignment requirement. + if (RequiredAlignment > First->getAlignment()) + continue; + + // Check that both loads are next to each other in memory. + if (!areSlicesNextToEachOther(*First, *Second)) + continue; + + assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); + --GlobalLSCost.Loads; + // Move to the next pair. + Second = NULL; + } +} + +/// \brief Check the profitability of all involved LoadedSlice. +/// Currently, it is considered profitable if there is exactly two +/// involved slices (1) which are (2) next to each other in memory, and +/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). +/// +/// Note: The order of the elements in \p LoadedSlices may be modified, but not +/// the elements themselves. +/// +/// FIXME: When the cost model will be mature enough, we can relax +/// constraints (1) and (2). +static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, + const APInt &UsedBits, bool ForCodeSize) { + unsigned NumberOfSlices = LoadedSlices.size(); + if (StressLoadSlicing) + return NumberOfSlices > 1; + + // Check (1). + if (NumberOfSlices != 2) + return false; + + // Check (2). + if (!areUsedBitsDense(UsedBits)) + return false; + + // Check (3). + LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); + // The original code has one big load. + OrigCost.Loads = 1; + for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { + const LoadedSlice &LS = LoadedSlices[CurrSlice]; + // Accumulate the cost of all the slices. + LoadedSlice::Cost SliceCost(LS, ForCodeSize); + GlobalSlicingCost += SliceCost; + + // Account as cost in the original configuration the gain obtained + // with the current slices. + OrigCost.addSliceGain(LS); + } + + // If the target supports paired load, adjust the cost accordingly. + adjustCostForPairing(LoadedSlices, GlobalSlicingCost); + return OrigCost > GlobalSlicingCost; +} + +/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) +/// operations, split it in the various pieces being extracted. +/// +/// This sort of thing is introduced by SROA. +/// This slicing takes care not to insert overlapping loads. +/// \pre LI is a simple load (i.e., not an atomic or volatile load). +bool DAGCombiner::SliceUpLoad(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + LoadSDNode *LD = cast<LoadSDNode>(N); + if (LD->isVolatile() || !ISD::isNormalLoad(LD) || + !LD->getValueType(0).isInteger()) + return false; + + // Keep track of already used bits to detect overlapping values. + // In that case, we will just abort the transformation. + APInt UsedBits(LD->getValueSizeInBits(0), 0); + + SmallVector<LoadedSlice, 4> LoadedSlices; + + // Check if this load is used as several smaller chunks of bits. + // Basically, look for uses in trunc or trunc(lshr) and record a new chain + // of computation for each trunc. + for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); + UI != UIEnd; ++UI) { + // Skip the uses of the chain. + if (UI.getUse().getResNo() != 0) + continue; + + SDNode *User = *UI; + unsigned Shift = 0; + + // Check if this is a trunc(lshr). + if (User->getOpcode() == ISD::SRL && User->hasOneUse() && + isa<ConstantSDNode>(User->getOperand(1))) { + Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue(); + User = *User->use_begin(); + } + + // At this point, User is a Truncate, iff we encountered, trunc or + // trunc(lshr). + if (User->getOpcode() != ISD::TRUNCATE) + return false; + + // The width of the type must be a power of 2 and greater than 8-bits. + // Otherwise the load cannot be represented in LLVM IR. + // Moreover, if we shifted with a non 8-bits multiple, the slice + // will be accross several bytes. We do not support that. + unsigned Width = User->getValueSizeInBits(0); + if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) + return 0; + + // Build the slice for this chain of computations. + LoadedSlice LS(User, LD, Shift, &DAG); + APInt CurrentUsedBits = LS.getUsedBits(); + + // Check if this slice overlaps with another. + if ((CurrentUsedBits & UsedBits) != 0) + return false; + // Update the bits used globally. + UsedBits |= CurrentUsedBits; + + // Check if the new slice would be legal. + if (!LS.isLegal()) + return false; + + // Record the slice. + LoadedSlices.push_back(LS); + } + + // Abort slicing if it does not seem to be profitable. + if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) + return false; + + ++SlicedLoads; + + // Rewrite each chain to use an independent load. + // By construction, each chain can be represented by a unique load. + + // Prepare the argument for the new token factor for all the slices. + SmallVector<SDValue, 8> ArgChains; + for (SmallVectorImpl<LoadedSlice>::const_iterator + LSIt = LoadedSlices.begin(), + LSItEnd = LoadedSlices.end(); + LSIt != LSItEnd; ++LSIt) { + SDValue SliceInst = LSIt->loadSlice(); + CombineTo(LSIt->Inst, SliceInst, true); + if (SliceInst.getNode()->getOpcode() != ISD::LOAD) + SliceInst = SliceInst.getOperand(0); + assert(SliceInst->getOpcode() == ISD::LOAD && + "It takes more than a zext to get to the loaded slice!!"); + ArgChains.push_back(SliceInst.getValue(1)); + } + + SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, + &ArgChains[0], ArgChains.size()); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); + return true; +} + /// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the /// load is having specific bytes cleared out. If so, return the byte size /// being masked out and the shift amount. @@ -7500,9 +8172,9 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { // 0 and the bits being kept are 1. Use getSExtValue so that leading bits // follow the sign bit for uniformity. uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); - unsigned NotMaskLZ = CountLeadingZeros_64(NotMask); + unsigned NotMaskLZ = countLeadingZeros(NotMask); if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. - unsigned NotMaskTZ = CountTrailingZeros_64(NotMask); + unsigned NotMaskTZ = countTrailingZeros(NotMask); if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. if (NotMaskLZ == 64) return Result; // All zero mask. @@ -7559,7 +8231,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. if (ByteShift) - IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal, + IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal, DAG.getConstant(ByteShift*8, DC->getShiftAmountTy(IVal.getValueType()))); @@ -7574,16 +8246,16 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, SDValue Ptr = St->getBasePtr(); if (StOffset) { - Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(), + Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(), Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); NewAlign = MinAlign(NewAlign, StOffset); } // Truncate down to the new size. - IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); + IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); ++OpsNarrowed; - return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, + return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr, St->getPointerInfo().getWithOffset(StOffset), false, false, NewAlign).getNode(); } @@ -7684,17 +8356,18 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) return SDValue(); - SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), + SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), Ptr.getValueType(), Ptr, DAG.getConstant(PtrOff, Ptr.getValueType())); - SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), + SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), NewAlign); - SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, + LD->isInvariant(), NewAlign, + LD->getTBAAInfo()); + SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); - SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), + SDValue NewST = DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, ST->getPointerInfo().getWithOffset(PtrOff), false, false, NewAlign); @@ -7747,12 +8420,12 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { if (LDAlign < ABIAlign || STAlign < ABIAlign) return SDValue(); - SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), + SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), false, false, false, LDAlign); - SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), + SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(), ST->getPointerInfo(), false, false, STAlign); @@ -7802,17 +8475,28 @@ struct BaseIndexOffset { static BaseIndexOffset match(SDValue Ptr) { bool IsIndexSignExt = false; - // Just Base or possibly anything else. + // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD + // instruction, then it could be just the BASE or everything else we don't + // know how to handle. Just use Ptr as BASE and give up. if (Ptr->getOpcode() != ISD::ADD) return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); - // Base + offset. + // We know that we have at least an ADD instruction. Try to pattern match + // the simple case of BASE + OFFSET. if (isa<ConstantSDNode>(Ptr->getOperand(1))) { int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, IsIndexSignExt); } + // Inside a loop the current BASE pointer is calculated using an ADD and a + // MUL instruction. In this case Ptr is the actual BASE pointer. + // (i64 add (i64 %array_ptr) + // (i64 mul (i64 %induction_var) + // (i64 %element_size))) + if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + // Look at Base + Index + Offset cases. SDValue Base = Ptr->getOperand(0); SDValue IndexOffset = Ptr->getOperand(1); @@ -7963,6 +8647,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { Index = STn; break; } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { + if (Ldn->isVolatile()) { + Index = NULL; + break; + } + // Save the load node for later. Continue the scan. AliasLoadNodes.push_back(Ldn); NextInChain = Ldn->getChain().getNode(); @@ -8080,7 +8769,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // The earliest Node in the DAG. LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; - DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc(); + SDLoc DL(StoreNodes[0].MemNode); SDValue StoredVal; if (UseVector) { @@ -8276,8 +8965,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW); } - DebugLoc LoadDL = LoadNodes[0].MemNode->getDebugLoc(); - DebugLoc StoreDL = StoreNodes[0].MemNode->getDebugLoc(); + SDLoc LoadDL(LoadNodes[0].MemNode); + SDLoc StoreDL(StoreNodes[0].MemNode); LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, @@ -8338,9 +9027,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (Align <= OrigAlign && ((!LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) - return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), + return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), OrigAlign); + ST->isNonTemporal(), OrigAlign, + ST->getTBAAInfo()); } // Turn 'store undef, Ptr' -> nothing. @@ -8355,7 +9045,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // transform should not be done in this case. if (Value.getOpcode() != ISD::TargetConstantFP) { SDValue Tmp; - switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { + switch (CFP->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unknown FP type"); case MVT::f16: // We don't do this for these yet. case MVT::f80: @@ -8367,9 +9057,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). bitcastToAPInt().getZExtValue(), MVT::i32); - return DAG.getStore(Chain, N->getDebugLoc(), Tmp, - Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + return DAG.getStore(Chain, SDLoc(N), Tmp, + Ptr, ST->getMemOperand()); } break; case MVT::f64: @@ -8378,9 +9067,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). getZExtValue(), MVT::i64); - return DAG.getStore(Chain, N->getDebugLoc(), Tmp, - Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + return DAG.getStore(Chain, SDLoc(N), Tmp, + Ptr, ST->getMemOperand()); } if (!ST->isVolatile() && @@ -8396,19 +9084,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); - SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, + SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, - ST->getAlignment()); - Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, + ST->getAlignment(), TBAAInfo); + Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); Alignment = MinAlign(Alignment, 4U); - SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, + SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, - Alignment); - return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + Alignment, TBAAInfo); + return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, St0, St1); } @@ -8421,9 +9110,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > ST->getAlignment()) - return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, + return DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), Align); + ST->isVolatile(), ST->isNonTemporal(), Align, + ST->getTBAAInfo()); } } @@ -8433,7 +9123,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (NewST.getNode()) return NewST; - if (CombinerAA) { + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : + TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + if (UseAA) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -8443,19 +9135,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Replace the chain to avoid dependency. if (ST->isTruncatingStore()) { - ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, - ST->getPointerInfo(), - ST->getMemoryVT(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr, + ST->getMemoryVT(), ST->getMemOperand()); } else { - ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, - ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr, + ST->getMemOperand()); } // Create token to keep both nodes around. - SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Chain, ReplStore); // Make sure the new and old chains are cleaned up. @@ -8483,10 +9171,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->getMemoryVT().getScalarType().getSizeInBits())); AddToWorkList(Value.getNode()); if (Shorter.getNode()) - return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, - Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + return DAG.getTruncStore(Chain, SDLoc(N), Shorter, + Ptr, ST->getMemoryVT(), ST->getMemOperand()); // Otherwise, see if we can simplify the operation with // SimplifyDemandedBits, which only works if the value has a single use. @@ -8516,10 +9202,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { && Value.getNode()->hasOneUse() && ST->isUnindexed() && TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), ST->getMemoryVT())) { - return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), - Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), + Ptr, ST->getMemoryVT(), ST->getMemOperand()); } // Only perform this optimization before the types are legal, because we @@ -8547,7 +9231,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue InVec = N->getOperand(0); SDValue InVal = N->getOperand(1); SDValue EltNo = N->getOperand(2); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // If the inserted element is an UNDEF, just use the input vector. if (InVal.getOpcode() == ISD::UNDEF) @@ -8568,7 +9252,9 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // be converted to a BUILD_VECTOR). Fill in the Ops vector with the // vector elements. SmallVector<SDValue, 8> Ops; - if (InVec.getOpcode() == ISD::BUILD_VECTOR) { + // Do not combine these two vectors if the output vector will not replace + // the input vector. + if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { Ops.append(InVec.getNode()->op_begin(), InVec.getNode()->op_end()); } else if (InVec.getOpcode() == ISD::UNDEF) { @@ -8608,7 +9294,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue InOp = InVec.getOperand(0); if (InOp.getValueType() != NVT) { assert(InOp.getValueType().isInteger() && NVT.isInteger()); - return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); + return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT); } return InOp; } @@ -8641,8 +9327,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { OrigElt -= NumElem; } - EVT IndexTy = N->getOperand(1).getValueType(); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT, + EVT IndexTy = TLI.getVectorIdxTy(); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, InVec, DAG.getConstant(OrigElt, IndexTy)); } @@ -8756,7 +9442,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { EVT PtrType = NewPtr.getValueType(); if (TLI.isBigEndian()) PtrOff = VT.getSizeInBits() / 8 - PtrOff; - NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, + NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr, DAG.getConstant(PtrOff, PtrType)); } @@ -8773,20 +9459,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // extending load instead. ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(), + Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align); + LVT, LN0->isVolatile(), LN0->isNonTemporal(), + Align, LN0->getTBAAInfo()); Chain = Load.getValue(1); } else { - Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, + Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Align); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), Align, LN0->getTBAAInfo()); Chain = Load.getValue(1); if (NVT.bitsLT(LVT)) - Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load); + Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load); else - Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load); + Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load); } WorkListRemover DeadNodes(*this); SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; @@ -8816,7 +9503,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { return SDValue(); unsigned NumInScalars = N->getNumOperands(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); // Check to see if this is a BUILD_VECTOR of a bunch of values @@ -8918,7 +9605,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumInScalars = N->getNumOperands(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT SrcVT = MVT::Other; unsigned Opcode = ISD::DELETED_NODE; @@ -8983,7 +9670,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); // A vector built entirely of undefs is undef. @@ -9119,8 +9806,35 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return N->getOperand(0); // Check if all of the operands are undefs. + EVT VT = N->getValueType(0); if (ISD::allOperandsUndef(N)) - return DAG.getUNDEF(N->getValueType(0)); + return DAG.getUNDEF(VT); + + // Optimize concat_vectors where one of the vectors is undef. + if (N->getNumOperands() == 2 && + N->getOperand(1)->getOpcode() == ISD::UNDEF) { + SDValue In = N->getOperand(0); + assert(In.getValueType().isVector() && "Must concat vectors"); + + // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). + if (In->getOpcode() == ISD::BITCAST && + !In->getOperand(0)->getValueType(0).isVector()) { + SDValue Scalar = In->getOperand(0); + EVT SclTy = Scalar->getValueType(0); + + if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) + return SDValue(); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, + VT.getSizeInBits() / SclTy.getSizeInBits()); + if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) + return SDValue(); + + SDLoc dl = SDLoc(N); + SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); + return DAG.getNode(ISD::BITCAST, dl, VT, Res); + } + } // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. @@ -9158,7 +9872,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // The extract index must be constant. if (!CS) return SDValue(); - + // Check that we are reading from the identity index. if (CS->getZExtValue() != IdentityIndex) return SDValue(); @@ -9166,7 +9880,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (SingleSource.getNode()) return SingleSource; - + return SDValue(); } @@ -9179,7 +9893,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // (extract_subvec (concat V1, V2, ...), i) // Into: // Vi if possible - // Only operand 0 is checked as 'concat' assumes all inputs of the same type. + // Only operand 0 is checked as 'concat' assumes all inputs of the same + // type. if (V->getOperand(0).getValueType() != NVT) return SDValue(); unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); @@ -9194,7 +9909,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { V = V.getOperand(0); if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Handle only simple case where vector being inserted and vector // being extracted are of same type, and are half size of larger vectors. EVT BigVT = V->getOperand(0).getValueType(); @@ -9246,22 +9961,36 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { for (unsigned I = 0; I != NumConcats; ++I) { // Make sure we're dealing with a copy. unsigned Begin = I * NumElemsPerConcat; - if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) - return SDValue(); + bool AllUndef = true, NoUndef = true; + for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) { + if (SVN->getMaskElt(J) >= 0) + AllUndef = false; + else + NoUndef = false; + } - for (unsigned J = 1; J != NumElemsPerConcat; ++J) { - if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) + if (NoUndef) { + if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) return SDValue(); - } - unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; - if (FirstElt < N0.getNumOperands()) - Ops.push_back(N0.getOperand(FirstElt)); - else - Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); + for (unsigned J = 1; J != NumElemsPerConcat; ++J) + if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) + return SDValue(); + + unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; + if (FirstElt < N0.getNumOperands()) + Ops.push_back(N0.getOperand(FirstElt)); + else + Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); + + } else if (AllUndef) { + Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); + } else { // Mixed with general masks and undefs, can't do optimization. + return SDValue(); + } } - return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, Ops.data(), + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops.data(), Ops.size()); } @@ -9288,7 +10017,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (Idx >= (int)NumElts) Idx -= NumElts; NewMask.push_back(Idx); } - return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT), + return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), &NewMask[0]); } @@ -9298,14 +10027,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); if (Idx >= 0) { - if (Idx < (int)NumElts) - Idx += NumElts; - else + if (Idx >= (int)NumElts) Idx -= NumElts; + else + Idx = -1; // remove reference to lhs } NewMask.push_back(Idx); } - return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT), + return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT), &NewMask[0]); } @@ -9322,7 +10051,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { NewMask.push_back(Idx); } if (Changed) - return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]); + return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]); } // If it is a splat, check if the argument vector is another splat or a @@ -9419,7 +10148,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); if (N->getOpcode() == ISD::AND) { @@ -9450,7 +10179,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT EltVT = RVT.getVectorElementType(); SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), DAG.getConstant(0, EltVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, &ZeroOps[0], ZeroOps.size()); LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); @@ -9506,13 +10235,13 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // legalization, the types may not match between the two BUILD_VECTORS. // Truncate one of the operands to make them match. if (RVT.getSizeInBits() > VT.getSizeInBits()) { - RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp); + RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp); } else { - LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp); + LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp); VT = RVT; } } - SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT, + SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT, LHSOp, RHSOp); if (FoldOp.getOpcode() != ISD::UNDEF && FoldOp.getOpcode() != ISD::Constant && @@ -9523,7 +10252,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } if (Ops.size() == LHS.getNumOperands()) - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), &Ops[0], Ops.size()); } @@ -9548,7 +10277,7 @@ SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { Op.getOpcode() != ISD::ConstantFP) break; EVT EltVT = Op.getValueType(); - SDValue FoldOp = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), EltVT, Op); + SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op); if (FoldOp.getOpcode() != ISD::UNDEF && FoldOp.getOpcode() != ISD::ConstantFP) break; @@ -9559,11 +10288,11 @@ SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { if (Ops.size() != N0.getNumOperands()) return SDValue(); - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), &Ops[0], Ops.size()); } -SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, +SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2){ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); @@ -9577,13 +10306,13 @@ SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, // Check to see if we got a select_cc back (to turn into setcc/select). // Otherwise, just return whatever node we got back, like fabs. if (SCC.getOpcode() == ISD::SELECT_CC) { - SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(), + SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), N0.getValueType(), SCC.getOperand(0), SCC.getOperand(1), SCC.getOperand(4)); AddToWorkList(SETCC.getNode()); - return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(), - SCC.getOperand(2), SCC.getOperand(3), SETCC); + return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), + SCC.getOperand(2), SCC.getOperand(3), SETCC); } return SCC; @@ -9652,10 +10381,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD)) return false; - Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), - LLD->getBasePtr().getValueType(), - TheSelect->getOperand(0), LLD->getBasePtr(), - RLD->getBasePtr()); + Addr = DAG.getSelect(SDLoc(TheSelect), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); } else { // Otherwise SELECT_CC SDNode *CondLHS = TheSelect->getOperand(0).getNode(); SDNode *CondRHS = TheSelect->getOperand(1).getNode(); @@ -9666,7 +10395,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) return false; - Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), + Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), LLD->getBasePtr().getValueType(), TheSelect->getOperand(0), TheSelect->getOperand(1), @@ -9677,17 +10406,17 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue Load; if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { Load = DAG.getLoad(TheSelect->getValueType(0), - TheSelect->getDebugLoc(), - // FIXME: Discards pointer info. + SDLoc(TheSelect), + // FIXME: Discards pointer and TBAA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->isVolatile(), LLD->isNonTemporal(), LLD->isInvariant(), LLD->getAlignment()); } else { Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() : LLD->getExtensionType(), - TheSelect->getDebugLoc(), + SDLoc(TheSelect), TheSelect->getValueType(0), - // FIXME: Discards pointer info. + // FIXME: Discards pointer and TBAA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->getMemoryVT(), LLD->isVolatile(), LLD->isNonTemporal(), LLD->getAlignment()); @@ -9708,7 +10437,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, /// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 /// where 'cond' is the comparison specified by CC. -SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, +SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare) { // (x ? y : y) -> y. @@ -9720,7 +10449,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); // Determine if the condition we're dealing with is constant - SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), + SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, DL, false); if (SCC.getNode()) AddToWorkList(SCC.getNode()); ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); @@ -9786,13 +10515,13 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue One = DAG.getIntPtrConstant(EltSize); SDValue Cond = DAG.getSetCC(DL, - TLI.getSetCCResultType(N0.getValueType()), + getSetCCResultType(N0.getValueType()), N0, N1, CC); AddToWorkList(Cond.getNode()); - SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(), - Cond, One, Zero); + SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), + Cond, One, Zero); AddToWorkList(CstOffset.getNode()); - CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, + CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); AddToWorkList(CPIdx.getNode()); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, @@ -9817,7 +10546,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, ShCtV = XType.getSizeInBits()-ShCtV-1; SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy(N0.getValueType())); - SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), + SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, ShCt); AddToWorkList(Shift.getNode()); @@ -9829,7 +10558,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, return DAG.getNode(ISD::AND, DL, AType, Shift, N2); } - SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), + SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); @@ -9862,14 +10591,14 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue ShlAmt = DAG.getConstant(AndMask.countLeadingZeros(), getShiftAmountTy(AndLHS.getValueType())); - SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt); + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); // Now arithmetic right shift it all the way over, so the result is either // all-ones, or zero. SDValue ShrAmt = DAG.getConstant(AndMask.getBitWidth()-1, getShiftAmountTy(Shl.getValueType())); - SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt); + SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); return DAG.getNode(ISD::AND, DL, VT, Shr, N3); } @@ -9889,21 +10618,21 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // NOTE: Don't create a SETCC if it's not legal on this target. if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, - LegalTypes ? TLI.getSetCCResultType(N0.getValueType()) : MVT::i1)) { + LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) { SDValue Temp, SCC; // cast from setcc result type to select result type if (LegalTypes) { - SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), + SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC); if (N2.getValueType().bitsLT(SCC.getValueType())) - Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), + Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), N2.getValueType()); else - Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), N2.getValueType(), SCC); } else { - SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); - Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); + Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), N2.getValueType(), SCC); } @@ -9914,9 +10643,10 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, return Temp; // shl setcc result by log2 n2c - return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(N2C->getAPIntValue().logBase2(), - getShiftAmountTy(Temp.getValueType()))); + return DAG.getNode( + ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), + getShiftAmountTy(Temp.getValueType()))); } } @@ -9926,8 +10656,8 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { EVT XType = N0.getValueType(); if (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { - SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); + TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) { + SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC); if (Res.getValueType() != VT) Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); return Res; @@ -9937,16 +10667,16 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, XType))) { - SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0); + SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0); return DAG.getNode(ISD::SRL, DL, XType, Ctlz, DAG.getConstant(Log2_32(XType.getSizeInBits()), getShiftAmountTy(Ctlz.getValueType()))); } // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { - SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(), + SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0), XType, DAG.getConstant(0, XType), N0); - SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType); + SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType); return DAG.getNode(ISD::SRL, DL, XType, DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), DAG.getConstant(XType.getSizeInBits()-1, @@ -9954,7 +10684,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, } // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { - SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, + SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); @@ -9980,11 +10710,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, EVT XType = N0.getValueType(); if (SubC && SubC->isNullValue() && XType.isInteger()) { - SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, + SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); - SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), + SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), XType, N0, Shift); AddToWorkList(Shift.getNode()); AddToWorkList(Add.getNode()); @@ -9998,7 +10728,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, /// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, - DebugLoc DL, bool foldBooleans) { + SDLoc DL, bool foldBooleans) { TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, Level, false, this); return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); @@ -10072,17 +10802,20 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, /// isAlias - Return true if there is any possibility that the two addresses /// overlap. -bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, +bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, const Value *SrcValue1, int SrcValueOffset1, unsigned SrcValueAlign1, const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, + SDValue Ptr2, int64_t Size2, bool IsVolatile2, const Value *SrcValue2, int SrcValueOffset2, unsigned SrcValueAlign2, const MDNode *TBAAInfo2) const { // If they are the same then they must be aliases. if (Ptr1 == Ptr2) return true; + // If they are both volatile then they cannot be reordered. + if (IsVolatile1 && IsVolatile2) return true; + // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; @@ -10127,7 +10860,9 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, return false; } - if (CombinerGlobalAA) { + bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA : + TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + if (UseAA && SrcValue1 && SrcValue2) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; @@ -10146,24 +10881,25 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) { SDValue Ptr0, Ptr1; int64_t Size0, Size1; + bool IsVolatile0, IsVolatile1; const Value *SrcValue0, *SrcValue1; int SrcValueOffset0, SrcValueOffset1; unsigned SrcValueAlign0, SrcValueAlign1; const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1; - FindAliasInfo(Op0, Ptr0, Size0, SrcValue0, SrcValueOffset0, + FindAliasInfo(Op0, Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, SrcValueAlign0, SrcTBAAInfo0); - FindAliasInfo(Op1, Ptr1, Size1, SrcValue1, SrcValueOffset1, + FindAliasInfo(Op1, Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, SrcValueAlign1, SrcTBAAInfo1); - return isAlias(Ptr0, Size0, SrcValue0, SrcValueOffset0, + return isAlias(Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, SrcValueAlign0, SrcTBAAInfo0, - Ptr1, Size1, SrcValue1, SrcValueOffset1, + Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, SrcValueAlign1, SrcTBAAInfo1); } /// FindAliasInfo - Extracts the relevant alias information from the memory -/// node. Returns true if the operand was a load. +/// node. Returns true if the operand was a nonvolatile load. bool DAGCombiner::FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, + SDValue &Ptr, int64_t &Size, bool &IsVolatile, const Value *&SrcValue, int &SrcValueOffset, unsigned &SrcValueAlign, @@ -10172,29 +10908,31 @@ bool DAGCombiner::FindAliasInfo(SDNode *N, Ptr = LS->getBasePtr(); Size = LS->getMemoryVT().getSizeInBits() >> 3; + IsVolatile = LS->isVolatile(); SrcValue = LS->getSrcValue(); SrcValueOffset = LS->getSrcValueOffset(); SrcValueAlign = LS->getOriginalAlignment(); TBAAInfo = LS->getTBAAInfo(); - return isa<LoadSDNode>(LS); + return isa<LoadSDNode>(LS) && !IsVolatile; } /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, - SmallVector<SDValue, 8> &Aliases) { + SmallVectorImpl<SDValue> &Aliases) { SmallVector<SDValue, 8> Chains; // List of chains to visit. SmallPtrSet<SDNode *, 16> Visited; // Visited node set. // Get alias information for node. SDValue Ptr; int64_t Size; + bool IsVolatile; const Value *SrcValue; int SrcValueOffset; unsigned SrcValueAlign; const MDNode *SrcTBAAInfo; - bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, - SrcValueAlign, SrcTBAAInfo); + bool IsLoad = FindAliasInfo(N, Ptr, Size, IsVolatile, SrcValue, + SrcValueOffset, SrcValueAlign, SrcTBAAInfo); // Starting off. Chains.push_back(OriginalChain); @@ -10235,20 +10973,21 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, // Get alias information for Chain. SDValue OpPtr; int64_t OpSize; + bool OpIsVolatile; const Value *OpSrcValue; int OpSrcValueOffset; unsigned OpSrcValueAlign; const MDNode *OpSrcTBAAInfo; bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, - OpSrcValue, OpSrcValueOffset, + OpIsVolatile, OpSrcValue, OpSrcValueOffset, OpSrcValueAlign, OpSrcTBAAInfo); // If chain is alias then stop here. if (!(IsLoad && IsOpLoad) && - isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, - SrcTBAAInfo, - OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, + isAlias(Ptr, Size, IsVolatile, SrcValue, SrcValueOffset, + SrcValueAlign, SrcTBAAInfo, + OpPtr, OpSize, OpIsVolatile, OpSrcValue, OpSrcValueOffset, OpSrcValueAlign, OpSrcTBAAInfo)) { Aliases.push_back(Chain); } else { @@ -10298,7 +11037,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return Aliases[0]; // Construct a custom tailored token factor. - return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, &Aliases[0], Aliases.size()); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index e096a23..a6f7461 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -41,6 +41,7 @@ #define DEBUG_TYPE "isel" #include "llvm/CodeGen/FastISel.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Loads.h" #include "llvm/CodeGen/Analysis.h" @@ -89,18 +90,16 @@ bool FastISel::LowerArguments() { // Fallback to SDISel argument lowering code to deal with sret pointer // parameter. return false; - + if (!FastLowerArguments()) return false; - // Enter non-dead arguments into ValueMap for uses in non-entry BBs. + // Enter arguments into ValueMap for uses in non-entry BBs. for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), E = FuncInfo.Fn->arg_end(); I != E; ++I) { - if (!I->use_empty()) { - DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I); - assert(VI != LocalValueMap.end() && "Missed an argument?"); - FuncInfo.ValueMap[I] = VI->second; - } + DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I); + assert(VI != LocalValueMap.end() && "Missed an argument?"); + FuncInfo.ValueMap[I] = VI->second; } return true; } @@ -598,7 +597,10 @@ bool FastISel::SelectCall(const User *I) { case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); - if (!DIVariable(DI->getVariable()).Verify() || + DIVariable DIVar(DI->getVariable()); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!DIVar || !FuncInfo.MF->getMMI().hasDebugInfo()) { DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; @@ -610,16 +612,16 @@ bool FastISel::SelectCall(const User *I) { return true; } - unsigned Reg = 0; unsigned Offset = 0; - if (const Argument *Arg = dyn_cast<Argument>(Address)) { + Optional<MachineOperand> Op; + if (const Argument *Arg = dyn_cast<Argument>(Address)) // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); - if (Offset) - Reg = TRI.getFrameRegister(*FuncInfo.MF); - } - if (!Reg) - Reg = lookUpRegForValue(Address); + if (Offset) + Op = MachineOperand::CreateFI(Offset); + if (!Op) + if (unsigned Reg = lookUpRegForValue(Address)) + Op = MachineOperand::CreateReg(Reg, false); // If we have a VLA that has a "use" in a metadata node that's then used // here but it has no other uses, then we have a problem. E.g., @@ -632,20 +634,29 @@ bool FastISel::SelectCall(const User *I) { // If we assign 'a' a vreg and fast isel later on has to use the selection // DAG isel, it will want to copy the value to the vreg. However, there are // no uses, which goes counter to what selection DAG isel expects. - if (!Reg && !Address->use_empty() && isa<Instruction>(Address) && + if (!Op && !Address->use_empty() && isa<Instruction>(Address) && (!isa<AllocaInst>(Address) || !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) - Reg = FuncInfo.InitializeRegForValue(Address); - - if (Reg) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(TargetOpcode::DBG_VALUE)) - .addReg(Reg, RegState::Debug).addImm(Offset) - .addMetadata(DI->getVariable()); - else + Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address), + false); + + if (Op) { + if (Op->isReg()) { + Op->setIsDebug(true); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0, + DI->getVariable()); + } else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::DBG_VALUE)) + .addOperand(*Op) + .addImm(0) + .addMetadata(DI->getVariable()); + } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + } return true; } case Intrinsic::dbg_value: { @@ -673,13 +684,14 @@ bool FastISel::SelectCall(const User *I) { .addFPImm(CF).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (unsigned Reg = lookUpRegForValue(V)) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) - .addReg(Reg, RegState::Debug).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); + // FIXME: This does not handle register-indirect values at offset 0. + bool IsIndirect = DI->getOffset() != 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, IsIndirect, + Reg, DI->getOffset(), DI->getVariable()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); } return true; } @@ -1559,4 +1571,19 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI); } +bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) { + // Must be an add. + if (!isa<AddOperator>(Add)) + return false; + // Type size needs to match. + if (TD.getTypeSizeInBits(GEP->getType()) != + TD.getTypeSizeInBits(Add->getType())) + return false; + // Must be in the same basic block. + if (isa<Instruction>(Add) && + FuncInfo.MBBMap[cast<Instruction>(Add)->getParent()] != FuncInfo.MBB) + return false; + // Must have a constant operand. + return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1)); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index b46edad..4309dc1 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -55,21 +55,19 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { return false; } -FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli) - : TLI(tli) { -} - void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { + const TargetLowering *TLI = TM.getTargetLowering(); + Fn = &fn; MF = &mf; RegInfo = &MF->getRegInfo(); // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, TLI); - CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF, - Fn->isVarArg(), - Outs, Fn->getContext()); + GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI); + CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF, + Fn->isVarArg(), + Outs, Fn->getContext()); // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines @@ -79,9 +77,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { Type *Ty = AI->getAllocatedType(); - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), + std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), AI->getAlignment()); TySize *= CUI->getZExtValue(); // Get total allocated size. @@ -114,8 +112,11 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { // in a predictable order. if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) { MachineModuleInfo &MMI = MF->getMMI(); + DIVariable DIVar(DI->getVariable()); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); if (MMI.hasDebugInfo() && - DIVariable(DI->getVariable()).Verify() && + DIVar && !DI->getDebugLoc().isUnknown()) { // Don't handle byval struct arguments or VLAs, for example. // Non-byval arguments are handled here (they refer to the stack @@ -167,10 +168,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { assert(PHIReg && "PHI node does not have an assigned virtual register!"); SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, PN->getType(), ValueVTs); + ComputeValueVTs(*TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT); + unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT); const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); for (unsigned i = 0; i != NumRegisters; ++i) BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i); @@ -208,7 +209,8 @@ void FunctionLoweringInfo::clear() { /// CreateReg - Allocate a single virtual register for the given type. unsigned FunctionLoweringInfo::CreateReg(MVT VT) { - return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); + return RegInfo-> + createVirtualRegister(TM.getTargetLowering()->getRegClassFor(VT)); } /// CreateRegs - Allocate the appropriate number of virtual registers of @@ -219,15 +221,17 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) { /// will assign registers for each member or element. /// unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { + const TargetLowering *TLI = TM.getTargetLowering(); + SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, Ty, ValueVTs); + ComputeValueVTs(*TLI, Ty, ValueVTs); unsigned FirstReg = 0; for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; - MVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT); + MVT RegisterVT = TLI->getRegisterType(Ty->getContext(), ValueVT); - unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT); + unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT); for (unsigned i = 0; i != NumRegs; ++i) { unsigned R = CreateReg(RegisterVT); if (!FirstReg) FirstReg = R; @@ -266,15 +270,17 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { if (!Ty->isIntegerTy() || Ty->isVectorTy()) return; + const TargetLowering *TLI = TM.getTargetLowering(); + SmallVector<EVT, 1> ValueVTs; - ComputeValueVTs(TLI, Ty, ValueVTs); + ComputeValueVTs(*TLI, Ty, ValueVTs); assert(ValueVTs.size() == 1 && "PHIs with non-vector integer types should have a single VT."); EVT IntVT = ValueVTs[0]; - if (TLI.getNumRegisters(PN->getContext(), IntVT) != 1) + if (TLI->getNumRegisters(PN->getContext(), IntVT) != 1) return; - IntVT = TLI.getTypeToTransformTo(PN->getContext(), IntVT); + IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT); unsigned BitWidth = IntVT.getSizeInBits(); unsigned DestReg = ValueMap[PN]; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 3b1abd7..3a8fb85 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -211,6 +212,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); + unsigned NumResults = CountResults(Node); for (unsigned i = 0; i < II.getNumDefs(); ++i) { // If the specific node value is only used by a CopyToReg and the dest reg // is a vreg in the same register class, use the CopyToReg'd destination @@ -218,6 +220,10 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, unsigned VRBase = 0; const TargetRegisterClass *RC = TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); + // If the register class is unknown for the given definition, then try to + // infer one from the value type. + if (!RC && i < NumResults) + RC = TLI->getRegClassFor(Node->getSimpleValueType(i)); if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); @@ -639,8 +645,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, if (SD->getKind() == SDDbgValue::FRAMEIX) { // Stack address; this needs to be lowered in target-dependent fashion. // EmitTargetCodeForFrameDebugValue is responsible for allocation. - unsigned FrameIx = SD->getFrameIx(); - return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL); + return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(SD->getFrameIx()).addImm(Offset).addMetadata(MDPtr); } // Otherwise, we're going to create an instruction here. const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); @@ -678,7 +684,13 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, MIB.addReg(0U); } - MIB.addImm(Offset).addMetadata(MDPtr); + if (Offset != 0) // Indirect addressing. + MIB.addImm(Offset); + else + MIB.addReg(0U, RegState::Debug); + + MIB.addMetadata(MDPtr); + return &*MIB; } @@ -716,10 +728,20 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); + unsigned NumDefs = II.getNumDefs(); + const uint16_t *ScratchRegs = NULL; + + // Handle PATCHPOINT specially and then use the generic code. + if (Opc == TargetOpcode::PATCHPOINT) { + unsigned CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); + NumDefs = NumResults; + ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC); + } + unsigned NumImpUses = 0; unsigned NodeOperands = - countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses); - bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; + countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); + bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) @@ -742,14 +764,20 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. - bool HasOptPRefs = II.getNumDefs() > NumResults; + bool HasOptPRefs = NumDefs > NumResults; assert((!HasOptPRefs || !HasPhysRegOuts) && "Unable to cope with optional defs and phys regs defs!"); - unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; + unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) - AddOperand(MIB, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, + AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); + // Add scratch registers as implicit def and early clobber + if (ScratchRegs) + for (unsigned i = 0; ScratchRegs[i]; ++i) + MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine | + RegState::EarlyClobber); + // Transfer all of the memory reference descriptions of this instruction. MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), cast<MachineSDNode>(Node)->memoperands_end()); @@ -778,8 +806,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Additional results must be physical register defs. if (HasPhysRegOuts) { - for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { - unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; + for (unsigned i = NumDefs; i < NumResults; ++i) { + unsigned Reg = II.getImplicitDefs()[i - NumDefs]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index a9c2203..920dda8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -1,4 +1,4 @@ -//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==// +//===- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG -*- C++ -*--==// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 2a1d8c2..9061ae9 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -58,6 +58,10 @@ class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { /// LegalizedNodes - The set of nodes which have already been legalized. SmallPtrSet<SDNode *, 16> LegalizedNodes; + EVT getSetCCResultType(EVT VT) const { + return TLI.getSetCCResultType(*DAG.getContext(), VT); + } + // Libcall insertion helpers. public: @@ -79,24 +83,24 @@ private: /// is necessary to spill the vector being inserted into to memory, perform /// the insert there, and then read the result back. SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, - SDValue Idx, DebugLoc dl); + SDValue Idx, SDLoc dl); SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, - SDValue Idx, DebugLoc dl); + SDValue Idx, SDLoc dl); /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> - SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, + SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, SDValue N1, SDValue N2, ArrayRef<int> Mask) const; - void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, - DebugLoc dl); + bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + bool &NeedInvert, SDLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, - unsigned NumOps, bool isSigned, DebugLoc dl); + unsigned NumOps, bool isSigned, SDLoc dl); std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); @@ -113,21 +117,21 @@ private: void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); - SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); + SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, SDLoc dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl<SDValue> &Results); SDValue ExpandFCOPYSIGN(SDNode *Node); SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, - DebugLoc dl); + SDLoc dl); SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, - DebugLoc dl); + SDLoc dl); SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, - DebugLoc dl); + SDLoc dl); - SDValue ExpandBSWAP(SDValue Op, DebugLoc dl); - SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl); + SDValue ExpandBSWAP(SDValue Op, SDLoc dl); + SDValue ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); SDValue ExpandInsertToVectorThroughStack(SDValue Op); @@ -181,7 +185,7 @@ public: /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue -SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, +SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, SDValue N1, SDValue N2, ArrayRef<int> Mask) const { unsigned NumMaskElts = VT.getVectorNumElements(); @@ -247,7 +251,7 @@ void SelectionDAGLegalize::LegalizeDAG() { SDValue SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { bool Extend = false; - DebugLoc dl = CFP->getDebugLoc(); + SDLoc dl(CFP); // If a FP immediate is precise when represented as a float and if the // target can do an extending load from float to double, we put it into @@ -307,7 +311,9 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Val = ST->getValue(); EVT VT = Val.getValueType(); int Alignment = ST->getAlignment(); - DebugLoc dl = ST->getDebugLoc(); + unsigned AS = ST->getAddressSpace(); + + SDLoc dl(ST); if (ST->getMemoryVT().isFloatingPoint() || ST->getMemoryVT().isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); @@ -339,7 +345,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr, MachinePointerInfo(), StoredVT, false, false, 0); - SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy(AS)); SmallVector<SDValue, 8> Stores; unsigned Offset = 0; @@ -377,7 +383,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, .getWithOffset(Offset), MemVT, ST->isVolatile(), ST->isNonTemporal(), - MinAlign(ST->getAlignment(), Offset))); + MinAlign(ST->getAlignment(), Offset), + ST->getTBAAInfo())); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], @@ -404,13 +411,14 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr, ST->getPointerInfo(), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, TLI.getPointerTy())); + DAG.getConstant(IncrementSize, TLI.getPointerTy(AS))); Alignment = MinAlign(Alignment, IncrementSize); Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), - Alignment); + Alignment, ST->getTBAAInfo()); SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); @@ -428,16 +436,14 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0); EVT LoadedVT = LD->getMemoryVT(); - DebugLoc dl = LD->getDebugLoc(); + SDLoc dl(LD); if (VT.isFloatingPoint() || VT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) { // Expand to a (misaligned) integer load of the same size, // then bitconvert to floating point or vector. - SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), - LD->isVolatile(), - LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, + LD->getMemOperand()); SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); if (LoadedVT != VT) Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : @@ -470,7 +476,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), - MinAlign(LD->getAlignment(), Offset)); + MinAlign(LD->getAlignment(), Offset), + LD->getTBAAInfo()); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, MachinePointerInfo(), false, false, 0)); @@ -488,7 +495,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->isVolatile(), LD->isNonTemporal(), - MinAlign(LD->getAlignment(), Offset)); + MinAlign(LD->getAlignment(), Offset), + LD->getTBAAInfo()); // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. @@ -532,23 +540,25 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, if (TLI.isLittleEndian()) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), Alignment); + LD->isNonTemporal(), Alignment, LD->getTBAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, TLI.getPointerTy())); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment, IncrementSize), + LD->getTBAAInfo()); } else { Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), Alignment); + LD->isNonTemporal(), Alignment, LD->getTBAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, TLI.getPointerTy())); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment, IncrementSize), + LD->getTBAAInfo()); } // aggregate the two parts @@ -570,7 +580,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, /// the insert there, and then read the result back. SDValue SelectionDAGLegalize:: PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, - DebugLoc dl) { + SDLoc dl) { SDValue Tmp1 = Vec; SDValue Tmp2 = Val; SDValue Tmp3 = Idx; @@ -606,13 +616,13 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, false, false, 0); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, - MachinePointerInfo::getFixedStack(SPFI), false, false, + MachinePointerInfo::getFixedStack(SPFI), false, false, false, 0); } SDValue SelectionDAGLegalize:: -ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) { +ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, SDLoc dl) { if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) { // SCALAR_TO_VECTOR requires that the type of the value being inserted // match the element type of the vector being created, except for @@ -651,7 +661,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - DebugLoc dl = ST->getDebugLoc(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); + SDLoc dl(ST); if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && TLI.isTypeLegal(MVT::i32)) { @@ -659,7 +670,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { bitcastToAPInt().zextOrTrunc(32), MVT::i32); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); } if (CFP->getValueType(0) == MVT::f64) { @@ -668,7 +679,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); } if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { @@ -681,12 +692,13 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(4)); + DAG.getConstant(4, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), - isVolatile, isNonTemporal, MinAlign(Alignment, 4U)); + isVolatile, isNonTemporal, MinAlign(Alignment, 4U), + TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -699,11 +711,12 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { StoreSDNode *ST = cast<StoreSDNode>(Node); SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { @@ -741,7 +754,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -763,7 +776,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + NVT, isVolatile, isNonTemporal, Alignment, + TBAAInfo); ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. @@ -784,19 +798,20 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the bottom RoundWidth bits. Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), RoundVT, - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, + TBAAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); } else { // Big endian - avoid unaligned stores. // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X @@ -805,16 +820,17 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), - RoundVT, isVolatile, isNonTemporal, Alignment); + RoundVT, isVolatile, isNonTemporal, Alignment, + TBAAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); } // The order of the stores doesn't matter. @@ -850,7 +866,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -863,7 +879,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { SDValue Chain = LD->getChain(); // The chain. SDValue Ptr = LD->getBasePtr(); // The base pointer. SDValue Value; // The value returned by the load op. - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { @@ -898,9 +914,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { assert(NVT.getSizeInBits() == VT.getSizeInBits() && "Can only promote loads to same size type"); - SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getMemOperand()); RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res); RChain = Res.getValue(1); break; @@ -920,6 +934,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); if (SrcWidth != SrcVT.getStoreSizeInBits() && // Some targets pretend to have an i1 loading operation, and actually @@ -946,7 +961,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { SDValue Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + NVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); Ch = Result.getValue(1); // The chain. @@ -983,16 +998,16 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1012,17 +1027,17 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the top RoundWidth bits. Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1075,9 +1090,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { case TargetLowering::Expand: if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, - LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + LD->getMemOperand()); unsigned ExtendOp; switch (ExtType) { case ISD::EXTLOAD: @@ -1105,9 +1118,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Turn the unsupported load into an EXTLOAD followed by an explicit // zero/sign extend inreg. SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Chain, Ptr, LD->getPointerInfo(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + Chain, Ptr, SrcVT, + LD->getMemOperand()); SDValue ValRes; if (ExtType == ISD::SEXTLOAD) ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, @@ -1249,7 +1261,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action == TargetLowering::Expand) { // replace ISD::DEBUGTRAP with ISD::TRAP SDValue NewVal; - NewVal = DAG.getNode(ISD::TRAP, Node->getDebugLoc(), Node->getVTList(), + NewVal = DAG.getNode(ISD::TRAP, SDLoc(Node), Node->getVTList(), Node->getOperand(0)); ReplaceNode(Node, NewVal.getNode()); LegalizeOp(NewVal.getNode()); @@ -1370,7 +1382,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { SDValue Vec = Op.getOperand(0); SDValue Idx = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Store the value to a temporary stack slot, then LOAD the returned part. SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, @@ -1382,11 +1394,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, DAG.getConstant(EltSize, Idx.getValueType())); - if (Idx.getValueType().bitsGT(TLI.getPointerTy())) - Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); - else - Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); - + Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); if (Op.getValueType().isVector()) @@ -1404,7 +1412,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { SDValue Vec = Op.getOperand(0); SDValue Part = Op.getOperand(1); SDValue Idx = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Store the value to a temporary stack slot, then LOAD the returned part. @@ -1424,11 +1432,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, DAG.getConstant(EltSize, Idx.getValueType())); - - if (Idx.getValueType().bitsGT(TLI.getPointerTy())) - Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); - else - Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); @@ -1449,7 +1453,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // Create the stack frame object. EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); SDValue FIPtr = DAG.CreateStackTemporary(VT); int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI); @@ -1489,12 +1493,12 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { StoreChain = DAG.getEntryNode(); // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, + return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, false, 0); } SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); SDValue Tmp1 = Node->getOperand(0); SDValue Tmp2 = Node->getOperand(1); @@ -1527,7 +1531,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), - LoadPtr, DAG.getIntPtrConstant(ByteOffset)); + LoadPtr, + DAG.getConstant(ByteOffset, LoadPtr.getValueType())); // Load a legal integer containing the sign bit. SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), false, false, false, 0); @@ -1542,16 +1547,16 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { } } // Now get the sign bit proper, by seeing whether the value is negative. - SignBit = DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()), + SignBit = DAG.getSetCC(dl, getSetCCResultType(SignBit.getValueType()), SignBit, DAG.getConstant(0, SignBit.getValueType()), ISD::SETLT); // Get the absolute value of the result. SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1); // Select between the nabs and abs value based on the sign bit of // the input. - return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit, - DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), - AbsVal); + return DAG.getSelect(dl, AbsVal.getValueType(), SignBit, + DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), + AbsVal); } void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, @@ -1559,7 +1564,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" " not tell us which reg is the stack pointer!"); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); EVT VT = Node->getValueType(0); SDValue Tmp1 = SDValue(Node, 0); SDValue Tmp2 = SDValue(Node, 1); @@ -1568,52 +1573,76 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, // Chain the dynamic stack allocation so that it doesn't modify the stack // pointer when other instructions are using the stack. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true), + SDLoc(Node)); SDValue Size = Tmp2.getOperand(1); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); - if (Align > StackAlign) - SP = DAG.getNode(ISD::AND, dl, VT, SP, - DAG.getConstant(-(uint64_t)Align, VT)); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + if (Align > StackAlign) + Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, + DAG.getConstant(-(uint64_t)Align, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), - DAG.getIntPtrConstant(0, true), SDValue()); + DAG.getIntPtrConstant(0, true), SDValue(), + SDLoc(Node)); Results.push_back(Tmp1); Results.push_back(Tmp2); } /// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and -/// condition code CC on the current target. This routine expands SETCC with -/// illegal condition code into AND / OR of multiple SETCC values. -void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, +/// condition code CC on the current target. +/// +/// If the SETCC has been legalized using AND / OR, then the legalized node +/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert +/// will be set to false. +/// +/// If the SETCC has been legalized by using getSetCCSwappedOperands(), +/// then the values of LHS and RHS will be swapped, CC will be set to the +/// new condition, and NeedInvert will be set to false. +/// +/// If the SETCC has been legalized using the inverse condcode, then LHS and +/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert +/// will be set to true. The caller must invert the result of the SETCC with +/// SelectionDAG::getNOT() or take equivalent action to swap the effect of a +/// true/false result. +/// +/// \returns true if the SetCC has been legalized, false if it hasn't. +bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, - DebugLoc dl) { + bool &NeedInvert, + SDLoc dl) { MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); + NeedInvert = false; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: // Nothing to do. break; case TargetLowering::Expand: { + ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode); + if (TLI.isCondCodeLegal(InvCC, OpVT)) { + std::swap(LHS, RHS); + CC = DAG.getCondCode(InvCC); + return true; + } ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; - ISD::CondCode InvCC = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { default: llvm_unreachable("Don't know how to expand this condition!"); - case ISD::SETO: + case ISD::SETO: assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT) == TargetLowering::Legal && "If SETO is expanded, SETOEQ must be legal!"); CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; - case ISD::SETUO: + case ISD::SETUO: assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT) == TargetLowering::Legal && "If SETUO is expanded, SETUNE must be legal!"); @@ -1623,12 +1652,12 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, case ISD::SETOGE: case ISD::SETOLT: case ISD::SETOLE: - case ISD::SETONE: - case ISD::SETUEQ: - case ISD::SETUNE: - case ISD::SETUGT: - case ISD::SETUGE: - case ISD::SETULT: + case ISD::SETONE: + case ISD::SETUEQ: + case ISD::SETUNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: case ISD::SETULE: // If we are floating point, assign and break, otherwise fall through. if (!OpVT.isInteger()) { @@ -1644,20 +1673,23 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, case ISD::SETGT: case ISD::SETGE: case ISD::SETLT: + // We only support using the inverted operation, which is computed above + // and not a different manner of supporting expanding these cases. + llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETNE: case ISD::SETEQ: - InvCC = ISD::getSetCCSwappedOperands(CCCode); - if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) { - // We only support using the inverted operation and not a - // different manner of supporting expanding these cases. - llvm_unreachable("Don't know how to expand this condition!"); + // Try inverting the result of the inverse condition. + InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ; + if (TLI.isCondCodeLegal(InvCC, OpVT)) { + CC = DAG.getCondCode(InvCC); + NeedInvert = true; + return true; } - LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC); - RHS = SDValue(); - CC = SDValue(); - return; + // If inverting the condition didn't work then we have no means to expand + // the condition. + llvm_unreachable("Don't know how to expand this condition!"); } - + SDValue SetCC1, SetCC2; if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { // If we aren't the ordered or unorder operation, @@ -1672,9 +1704,10 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); RHS = SDValue(); CC = SDValue(); - break; + return true; } } + return false; } /// EmitStackConvert - Emit a store/load combination to the stack. This stores @@ -1684,7 +1717,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, - DebugLoc dl) { + SDLoc dl) { // Create the stack frame object. unsigned SrcAlign = TLI.getDataLayout()->getPrefTypeAlignment(SrcOp.getValueType(). @@ -1725,7 +1758,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, } SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); // Create a vector sized/aligned stack slot, store the value to element #0, // then load the whole vector back out. SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0)); @@ -1749,7 +1782,7 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned NumElems = Node->getNumOperands(); SDValue Value1, Value2; - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); EVT VT = Node->getValueType(0); EVT OpVT = Node->getOperand(0).getValueType(); EVT EltVT = VT.getVectorElementType(); @@ -1881,7 +1914,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), isTailCall, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, Node->getDebugLoc()); + Callee, Args, DAG, SDLoc(Node)); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -1896,7 +1929,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, /// and returning a result of type RetVT. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, - bool isSigned, DebugLoc dl) { + bool isSigned, SDLoc dl) { TargetLowering::ArgListTy Args; Args.reserve(NumOps); @@ -1950,7 +1983,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, Node->getDebugLoc()); + Callee, Args, DAG, SDLoc(Node)); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); return CallInfo; @@ -1963,7 +1996,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; @@ -1981,7 +2014,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; @@ -1996,7 +2029,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; @@ -2043,7 +2076,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned = Opcode == ISD::SDIVREM; RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; @@ -2082,7 +2115,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); TargetLowering:: CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, @@ -2100,7 +2133,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, /// isSinCosLibcallAvailable - Return true if sincos libcall is available. static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = RTLIB::SINCOS_F32; break; case MVT::f64: LC = RTLIB::SINCOS_F64; break; @@ -2130,7 +2163,7 @@ static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, static bool useSinCos(SDNode *Node) { unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN ? ISD::FCOS : ISD::FSIN; - + SDValue Op0 = Node->getOperand(0); for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), UE = Op0.getNode()->use_end(); UI != UE; ++UI) { @@ -2150,7 +2183,7 @@ void SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = RTLIB::SINCOS_F32; break; case MVT::f64: LC = RTLIB::SINCOS_F64; break; @@ -2158,25 +2191,25 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, case MVT::f128: LC = RTLIB::SINCOS_F128; break; case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; } - + // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. SDValue InChain = DAG.getEntryNode(); - + EVT RetVT = Node->getValueType(0); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - + TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - + // Pass the argument. Entry.Node = Node->getOperand(0); Entry.Ty = RetTy; Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); - + // Pass the return address of sin. SDValue SinPtr = DAG.CreateStackTemporary(RetVT); Entry.Node = SinPtr; @@ -2184,7 +2217,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); - + // Also pass the return address of the cos. SDValue CosPtr = DAG.CreateStackTemporary(RetVT); Entry.Node = CosPtr; @@ -2192,11 +2225,11 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); - + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - - DebugLoc dl = Node->getDebugLoc(); + + SDLoc dl(Node); TargetLowering:: CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, @@ -2218,7 +2251,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, - DebugLoc dl) { + SDLoc dl) { if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { // simple 32-bit [signed|unsigned] integer to float/double expansion @@ -2226,11 +2259,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); // word offset constant for Hi/Lo address computation - SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy()); + SDValue WordOff = DAG.getConstant(sizeof(int), StackSlot.getValueType()); // set up Hi and Lo (into buffer) address based on endian SDValue Hi = StackSlot; - SDValue Lo = DAG.getNode(ISD::ADD, dl, - TLI.getPointerTy(), StackSlot, WordOff); + SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(), + StackSlot, WordOff); if (TLI.isLittleEndian()) std::swap(Hi, Lo); @@ -2327,9 +2360,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, // select. We happen to get lucky and machinesink does the right // thing most of the time. This would be a good candidate for a //pseudo-op, or, even better, for whole-function isel. - SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + SDValue SignBitTest = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT); - return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast); + return DAG.getSelect(dl, MVT::f32, SignBitTest, Slow, Fast); } // Otherwise, implement the fully general conversion. @@ -2340,13 +2373,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, DAG.getConstant(UINT64_C(0x800), MVT::i64)); SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0x7ff), MVT::i64)); - SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE); - SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0); - SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + SDValue Sel = DAG.getSelect(dl, MVT::i64, Ne, Or, Op0); + SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), ISD::SETUGE); - SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0); + SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0); EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType()); SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, @@ -2365,18 +2398,18 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); - SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()), + SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(Op0.getValueType()), Op0, DAG.getConstant(0, Op0.getValueType()), ISD::SETLT); SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4); - SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), + SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Four, Zero); // If the sign bit of the integer is set, the large number will be treated // as a negative number. To counteract this, the dynamic code adds an // offset depending on the data type. uint64_t FF; - switch (Op0.getValueType().getSimpleVT().SimpleTy) { + switch (Op0.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unsupported integer type!"); case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) @@ -2389,7 +2422,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); - CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset); + CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset); Alignment = std::min(Alignment, 4u); SDValue FudgeInReg; if (DestVT == MVT::f32) @@ -2417,7 +2450,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, - DebugLoc dl) { + SDLoc dl) { // First step, figure out the appropriate *INT_TO_FP operation to use. EVT NewInTy = LegalOp.getValueType(); @@ -2459,7 +2492,7 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, - DebugLoc dl) { + SDLoc dl) { // First step, figure out the appropriate FP_TO*INT operation to use. EVT NewOutTy = DestVT; @@ -2494,7 +2527,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, /// ExpandBSWAP - Open code the operations for BSWAP of the specified operation. /// -SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { +SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; @@ -2542,7 +2575,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { /// ExpandBitCount - Expand the specified bitcount instruction into operations. /// SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, - DebugLoc dl) { + SDLoc dl) { switch (Opc) { default: llvm_unreachable("Cannot expand this yet!"); case ISD::CTPOP: { @@ -2650,6 +2683,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; } break; case ISD::ATOMIC_CMP_SWAP: @@ -2659,6 +2693,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; } break; case ISD::ATOMIC_LOAD_ADD: @@ -2668,6 +2703,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; } break; case ISD::ATOMIC_LOAD_SUB: @@ -2677,6 +2713,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; } break; case ISD::ATOMIC_LOAD_AND: @@ -2686,6 +2723,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; } break; case ISD::ATOMIC_LOAD_OR: @@ -2695,6 +2733,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; } break; case ISD::ATOMIC_LOAD_XOR: @@ -2704,6 +2743,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; } break; case ISD::ATOMIC_LOAD_NAND: @@ -2713,6 +2753,47 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; + } + break; + case ISD::ATOMIC_LOAD_MAX: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MAX_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MAX_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MAX_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MAX_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MAX_16;break; + } + break; + case ISD::ATOMIC_LOAD_UMAX: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMAX_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMAX_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMAX_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMAX_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMAX_16;break; + } + break; + case ISD::ATOMIC_LOAD_MIN: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MIN_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MIN_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MIN_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MIN_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MIN_16;break; + } + break; + case ISD::ATOMIC_LOAD_UMIN: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMIN_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMIN_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMIN_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMIN_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMIN_16;break; } break; } @@ -2722,8 +2803,9 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SmallVector<SDValue, 8> Results; - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; + bool NeedInvert; switch (Node->getOpcode()) { case ISD::CTPOP: case ISD::CTLZ: @@ -2913,7 +2995,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { APInt x = APInt::getSignBit(NVT.getSizeInBits()); (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); Tmp1 = DAG.getConstantFP(apf, VT); - Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT), Node->getOperand(0), Tmp1, ISD::SETLT); True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0)); @@ -2922,7 +3004,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(0), Tmp1)); False = DAG.getNode(ISD::XOR, dl, NVT, False, DAG.getConstant(x, NVT)); - Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False); + Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False); Results.push_back(Tmp1); break; } @@ -2934,27 +3016,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned Align = Node->getConstantOperandVal(3); SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, - MachinePointerInfo(V), + MachinePointerInfo(V), false, false, false, 0); SDValue VAList = VAListLoad; if (Align > TLI.getMinStackArgumentAlignment()) { assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); - VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, DAG.getConstant(Align - 1, - TLI.getPointerTy())); + VAList.getValueType())); - VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList, + VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList, DAG.getConstant(-(int64_t)Align, - TLI.getPointerTy())); + VAList.getValueType())); } // Increment the pointer, VAList, to the next vaarg - Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, DAG.getConstant(TLI.getDataLayout()-> getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), - TLI.getPointerTy())); + VAList.getValueType())); // Store the incremented VAList to the legalized pointer Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, MachinePointerInfo(V), false, false, 0); @@ -3025,7 +3107,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // cast operands to v8i32 and re-build the mask. // Calculate new VT, the size of the new VT should be equal to original. - EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, VT.getSizeInBits()/NewEltVT.getSizeInBits()); assert(NewVT.bitsEq(VT)); @@ -3065,11 +3147,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (Idx < NumElems) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0, - DAG.getIntPtrConstant(Idx))); + DAG.getConstant(Idx, TLI.getVectorIdxTy()))); else Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1, - DAG.getIntPtrConstant(Idx - NumElems))); + DAG.getConstant(Idx - NumElems, + TLI.getVectorIdxTy()))); } Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); @@ -3131,10 +3214,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT VT = Node->getValueType(0); Tmp1 = Node->getOperand(0); Tmp2 = DAG.getConstantFP(0.0, VT); - Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, ISD::SETUGT); Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1); - Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3); + Tmp1 = DAG.getSelect(dl, VT, Tmp2, Tmp1, Tmp3); Results.push_back(Tmp1); break; } @@ -3224,6 +3307,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128)); break; + case ISD::FROUND: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128)); + break; case ISD::FPOWI: Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, RTLIB::POWI_F128, @@ -3263,22 +3353,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(ExpandConstantFP(CFP, true)); break; } - case ISD::EHSELECTION: { - unsigned Reg = TLI.getExceptionSelectorRegister(); - assert(Reg && "Can't expand to unknown register!"); - Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg, - Node->getValueType(0))); - Results.push_back(Results[0].getValue(1)); - break; - } - case ISD::EXCEPTIONADDR: { - unsigned Reg = TLI.getExceptionPointerRegister(); - assert(Reg && "Can't expand to unknown register!"); - Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg, - Node->getValueType(0))); - Results.push_back(Results[0].getValue(1)); - break; - } case ISD::FSUB: { EVT VT = Node->getValueType(0); assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) && @@ -3528,10 +3602,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy(BottomHalf.getValueType())); Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1); - TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1, + TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1, ISD::SETNE); } else { - TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, + TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, DAG.getConstant(0, VT), ISD::SETNE); } Results.push_back(BottomHalf); @@ -3574,9 +3648,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); - Index = DAG.getNode(ISD::MUL, dl, PTy, - Index, DAG.getConstant(EntrySize, PTy)); - SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), + Index, DAG.getConstant(EntrySize, Index.getValueType())); + SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(), + Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, @@ -3620,10 +3695,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); Tmp3 = Node->getOperand(2); - LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl); + bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, + Tmp3, NeedInvert, dl); + + if (Legalized) { + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SETCC node. + if (Tmp3.getNode()) + Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), + Tmp1, Tmp2, Tmp3); + + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + Tmp1 = DAG.getNOT(dl, Tmp1, Tmp1->getValueType(0)); - // If we expanded the SETCC into an AND/OR, return the new node - if (Tmp2.getNode() == 0) { Results.push_back(Tmp1); break; } @@ -3654,14 +3740,52 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp4 = Node->getOperand(3); // False SDValue CC = Node->getOperand(4); - LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()), - Tmp1, Tmp2, CC, dl); + bool Legalized = false; + // Try to legalize by inverting the condition. This is for targets that + // might support an ordered version of a condition, but not the unordered + // version (or vice versa). + ISD::CondCode InvCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), + Tmp1.getValueType().isInteger()); + if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) { + // Use the new condition code and swap true and false + Legalized = true; + Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC); + } else { + // If The inverse is not legal, then try to swap the arguments using + // the inverse condition code. + ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC); + if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) { + // The swapped inverse condition is legal, so swap true and false, + // lhs and rhs. + Legalized = true; + Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC); + } + } + + if (!Legalized) { + Legalized = LegalizeSetCCCondCode( + getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert, + dl); - assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!"); - Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); - CC = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, - Tmp3, Tmp4, CC); + assert(Legalized && "Can't legalize SELECT_CC with legal condition!"); + + // If we expanded the SETCC by inverting the condition code, then swap + // the True/False operands to match. + if (NeedInvert) + std::swap(Tmp3, Tmp4); + + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SELECT_CC node. + if (CC.getNode()) { + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), + Tmp1, Tmp2, Tmp3, Tmp4, CC); + } else { + Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); + CC = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, + Tmp3, Tmp4, CC); + } + } Results.push_back(Tmp1); break; } @@ -3671,14 +3795,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp3 = Node->getOperand(3); // RHS Tmp4 = Node->getOperand(1); // CC - LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), - Tmp2, Tmp3, Tmp4, dl); - - assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); - Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); - Tmp4 = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, - Tmp3, Node->getOperand(4)); + bool Legalized = LegalizeSetCCCondCode(getSetCCResultType( + Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl); + (void)Legalized; + assert(Legalized && "Can't legalize BR_CC with legal condition!"); + + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0)); + + // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC + // node. + if (Tmp4.getNode()) { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, + Tmp4, Tmp2, Tmp3, Node->getOperand(4)); + } else { + Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); + Tmp4 = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, + Tmp3, Node->getOperand(4)); + } Results.push_back(Tmp1); break; } @@ -3698,10 +3835,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { for (unsigned Idx = 0; Idx < NumElem; Idx++) { SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), - Node->getOperand(0), DAG.getIntPtrConstant(Idx)); + Node->getOperand(0), DAG.getConstant(Idx, + TLI.getVectorIdxTy())); SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), - Node->getOperand(1), DAG.getIntPtrConstant(Idx)); + Node->getOperand(1), DAG.getConstant(Idx, + TLI.getVectorIdxTy())); Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, VT.getScalarType(), Ex, Sh)); } @@ -3738,7 +3877,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { OVT = Node->getOperand(0).getSimpleValueType(); } MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3; switch (Node->getOpcode()) { case ISD::CTTZ: @@ -3753,11 +3892,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); if (Node->getOpcode() == ISD::CTTZ) { // FIXME: This should set a bit in the zero extended value instead. - Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT), ISD::SETEQ); - Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, - DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); + Tmp1 = DAG.getSelect(dl, NVT, Tmp2, + DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); } else if (Node->getOpcode() == ISD::CTLZ || Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) @@ -3852,7 +3991,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); // Perform the larger operation, then round down. - Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3); + Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3); if (TruncOp != ISD::FP_ROUND) Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); else diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index de217d8..ecf4c5d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -88,6 +88,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; case ISD::FREM: R = SoftenFloatRes_FREM(N); break; case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; + case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break; case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; @@ -118,7 +119,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N, SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { // Convert the inputs to integers, and build a new pair out of them. - return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(), + return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), BitConvertToInteger(N->getOperand(0)), @@ -133,7 +134,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NewOp.getValueType().getVectorElementType(), NewOp, N->getOperand(1)); } @@ -147,7 +148,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { API.clearBit(Size-1); SDValue Mask = DAG.getConstant(API, NVT); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask); + return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask); } SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { @@ -160,7 +161,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { RTLIB::ADD_F80, RTLIB::ADD_F128, RTLIB::ADD_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { @@ -172,13 +173,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { RTLIB::CEIL_F80, RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(0)); SDValue RHS = BitConvertToInteger(N->getOperand(1)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT LVT = LHS.getValueType(); EVT RVT = RHS.getValueType(); @@ -226,7 +227,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { @@ -239,7 +240,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { @@ -251,7 +252,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { RTLIB::EXP_F80, RTLIB::EXP_F128, RTLIB::EXP_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { @@ -263,7 +264,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { @@ -275,7 +276,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { @@ -287,7 +288,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { RTLIB::LOG_F80, RTLIB::LOG_F128, RTLIB::LOG_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { @@ -299,7 +300,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { RTLIB::LOG2_F80, RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { @@ -311,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { RTLIB::LOG10_F80, RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { @@ -325,7 +326,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - NVT, Ops, 3, false, N->getDebugLoc()); + NVT, Ops, 3, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { @@ -338,7 +339,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { @@ -350,7 +351,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { RTLIB::NEARBYINT_F80, RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { @@ -364,7 +365,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { @@ -372,7 +373,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -381,7 +382,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, - N->getDebugLoc()); + SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -389,7 +390,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { @@ -402,7 +403,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { @@ -416,7 +417,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { RTLIB::POWI_F80, RTLIB::POWI_F128, RTLIB::POWI_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { @@ -429,7 +430,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { RTLIB::REM_F80, RTLIB::REM_F128, RTLIB::REM_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { @@ -441,7 +442,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { RTLIB::RINT_F80, RTLIB::RINT_F128, RTLIB::RINT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { @@ -453,7 +466,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { RTLIB::SIN_F80, RTLIB::SIN_F128, RTLIB::SIN_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { @@ -465,7 +478,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { @@ -478,7 +491,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { @@ -490,21 +503,22 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { LoadSDNode *L = cast<LoadSDNode>(N); EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue NewL; if (L->getExtensionType() == ISD::NON_EXTLOAD) { NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getPointerInfo(), NVT, L->isVolatile(), - L->isNonTemporal(), false, L->getAlignment()); + L->getPointerInfo(), NVT, L->isVolatile(), + L->isNonTemporal(), false, L->getAlignment(), + L->getTBAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -516,7 +530,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getMemoryVT(), dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), L->getMemoryVT(), L->isVolatile(), - L->isNonTemporal(), false, L->getAlignment()); + L->isNonTemporal(), false, L->getAlignment(), + L->getTBAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -526,14 +541,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(1)); SDValue RHS = GetSoftenedFloat(N->getOperand(2)); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), - LHS.getValueType(), N->getOperand(0),LHS,RHS); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, RHS); } SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(2)); SDValue RHS = GetSoftenedFloat(N->getOperand(3)); - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(), N->getOperand(0), N->getOperand(1), LHS, RHS, N->getOperand(4)); } @@ -548,7 +563,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { SDValue Ptr = N->getOperand(1); // Get the pointer. EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue NewVAARG; NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), @@ -565,7 +580,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { EVT SVT = N->getOperand(0).getValueType(); EVT RVT = N->getValueType(0); EVT NVT = EVT(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly @@ -585,7 +600,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { NVT, N->getOperand(0)); return TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - &Op, 1, false, dl); + &Op, 1, false, dl).first; } @@ -633,7 +648,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { } SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), GetSoftenedFloat(N->getOperand(0))); } @@ -645,7 +660,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { @@ -655,7 +670,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -676,7 +691,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { @@ -684,14 +699,14 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) { EVT RVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16; SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { @@ -701,7 +716,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -724,7 +739,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); // If softenSetCCOperands returned a scalar, use it. if (NewRHS.getNode() == 0) { @@ -744,7 +759,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "Can only soften the stored value!"); StoreSDNode *ST = cast<StoreSDNode>(N); SDValue Val = ST->getValue(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (ST->isTruncatingStore()) // Do an FP_ROUND followed by a non-truncating store. @@ -754,9 +769,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { Val = GetSoftenedFloat(Val); return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(), - ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + ST->getMemOperand()); } @@ -817,6 +830,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; + case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break; case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; @@ -850,14 +864,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0) == MVT::ppcf128 && "Logic only correct for ppcf128!"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Tmp; GetExpandedFloat(N->getOperand(0), Lo, Tmp); Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp); // Lo = Hi==fabs(Hi) ? Lo : -Lo; - Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo, + Lo = DAG.getSelectCC(dl, Tmp, Hi, Lo, DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo), - DAG.getCondCode(ISD::SETEQ)); + ISD::SETEQ); } void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, @@ -912,7 +926,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -986,7 +1000,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), N->getValueType(0), Ops, 3, false, - N->getDebugLoc()); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1000,7 +1014,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1018,7 +1032,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedFloat(N->getOperand(0), Lo, Hi); Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo); Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi); @@ -1027,7 +1041,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0)); + Hi = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), NVT, N->getOperand(0)); Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), APInt(NVT.getSizeInBits(), 0)), NVT); } @@ -1072,6 +1086,18 @@ void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, GetPairElements(Call, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), @@ -1102,7 +1128,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1127,15 +1153,14 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, LoadSDNode *LD = cast<LoadSDNode>(N); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); assert(NVT.isByteSized() && "Expanded type not byte sized!"); assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, - LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->getMemoryVT(), LD->getMemOperand()); // Remember the chain. Chain = Hi.getValue(1); @@ -1157,7 +1182,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue Src = N->getOperand(0); EVT SrcVT = Src.getValueType(); bool isSigned = N->getOpcode() == ISD::SINT_TO_FP; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // First do an SINT_TO_FP, whether the original was signed or unsigned. // When promoting partial word types to i32 we must honor the signedness, @@ -1181,7 +1206,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); - Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl); + Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl).first; GetPairElements(Hi, Lo, Hi); } @@ -1216,8 +1241,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble, APInt(128, Parts)), MVT::ppcf128)); - Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT), - Lo, Hi, DAG.getCondCode(ISD::SETLT)); + Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, SrcVT), + Lo, Hi, ISD::SETLT); GetPairElements(Lo, Lo, Hi); } @@ -1251,6 +1276,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; + case ISD::FCOPYSIGN: Res = ExpandFloatOp_FCOPYSIGN(N); break; case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; @@ -1280,7 +1306,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - DebugLoc dl) { + SDLoc dl) { SDValue LHSLo, LHSHi, RHSLo, RHSHi; GetExpandedFloat(NewLHS, LHSLo, LHSHi); GetExpandedFloat(NewRHS, RHSLo, RHSHi); @@ -1293,14 +1319,14 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, // FCMPU crN, lo1, lo2 // The following can be improved, but not that much. SDValue Tmp1, Tmp2, Tmp3; - Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETOEQ); - Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()), + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo, RHSLo, CCCode); Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); - Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETUNE); - Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, CCCode); Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3); @@ -1310,7 +1336,7 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); - FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -1325,19 +1351,30 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { N->getOperand(4)), 0); } +SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) { + assert(N->getOperand(1).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Lo, Hi; + GetExpandedFloat(N->getOperand(1), Lo, Hi); + // The ppcf128 value is providing only the sign; take it from the + // higher-order double (which must have the larger magnitude). + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), + N->getValueType(0), N->getOperand(0), Hi); +} + SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { assert(N->getOperand(0).getValueType() == MVT::ppcf128 && "Logic only correct for ppcf128!"); SDValue Lo, Hi; GetExpandedFloat(N->getOperand(0), Lo, Hi); // Round it the rest of the way (e.g. to f32) if needed. - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), N->getValueType(0), Hi, N->getOperand(1)); } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { EVT RVT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on // PPC (the libcall is not available). FIXME: Do this in a less hacky way. @@ -1353,12 +1390,12 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl); + return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { EVT RVT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on // PPC (the libcall is not available). FIXME: Do this in a less hacky way. @@ -1370,29 +1407,29 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128); // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. - return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp, - DAG.getNode(ISD::ADD, dl, MVT::i32, - DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, - DAG.getNode(ISD::FSUB, dl, - MVT::ppcf128, - N->getOperand(0), - Tmp)), - DAG.getConstant(0x80000000, MVT::i32)), - DAG.getNode(ISD::FP_TO_SINT, dl, - MVT::i32, N->getOperand(0)), - DAG.getCondCode(ISD::SETGE)); + return DAG.getSelectCC(dl, N->getOperand(0), Tmp, + DAG.getNode(ISD::ADD, dl, MVT::i32, + DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, + DAG.getNode(ISD::FSUB, dl, + MVT::ppcf128, + N->getOperand(0), + Tmp)), + DAG.getConstant(0x80000000, MVT::i32)), + DAG.getNode(ISD::FP_TO_SINT, dl, + MVT::i32, N->getOperand(0)), + ISD::SETGE); } RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1, - false, dl); + false, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); - FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -1410,7 +1447,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); - FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, use it. if (NewRHS.getNode() == 0) { @@ -1444,8 +1481,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { SDValue Lo, Hi; GetExpandedOp(ST->getValue(), Lo, Hi); - return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr, - ST->getPointerInfo(), - ST->getMemoryVT(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + return DAG.getTruncStore(Chain, SDLoc(N), Hi, Ptr, + ST->getMemoryVT(), ST->getMemOperand()); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index cd2f060..4255948 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -153,20 +153,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N, SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) { // Sign-extend the new bits, and continue the assertion. SDValue Op = SExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::AssertSext, N->getDebugLoc(), + return DAG.getNode(ISD::AssertSext, SDLoc(N), Op.getValueType(), Op, N->getOperand(1)); } SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) { // Zero the new bits, and continue the assertion. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::AssertZext, N->getDebugLoc(), + return DAG.getNode(ISD::AssertZext, SDLoc(N), Op.getValueType(), Op, N->getOperand(1)); } SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { EVT ResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), ResVT, N->getChain(), N->getBasePtr(), N->getMemOperand(), N->getOrdering(), @@ -179,7 +179,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { SDValue Op2 = GetPromotedInteger(N->getOperand(2)); - SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(), N->getOrdering(), @@ -193,7 +193,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { SDValue Op2 = GetPromotedInteger(N->getOperand(2)); SDValue Op3 = GetPromotedInteger(N->getOperand(3)); - SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), N->getChain(), N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getOrdering(), N->getSynchScope()); @@ -209,7 +209,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); switch (getTypeAction(InVT)) { case TargetLowering::TypeLegal: @@ -264,7 +264,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), @@ -274,7 +274,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { // The pair element type may be legal, or may not promote to the same type as // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases. - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), JoinIntegers(N->getOperand(0), N->getOperand(1))); @@ -283,7 +283,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { EVT VT = N->getValueType(0); // FIXME there is no actual debug info here - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Zero extend things like i1, sign extend everything else. It shouldn't // matter in theory which one we pick, but this tends to give better code? unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; @@ -301,7 +301,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) { CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) && "can only promote integers"); EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0), + return DAG.getConvertRndSat(OutVT, SDLoc(N), N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), N->getOperand(4), CvtCode); } @@ -309,7 +309,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { // Zero extend to the promoted type and do the count there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); Op = DAG.getNode(N->getOpcode(), dl, NVT, Op); @@ -322,14 +322,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) { // Zero extend to the promoted type and do the count there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op); + return DAG.getNode(ISD::CTPOP, SDLoc(N), Op.getValueType(), Op); } SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (N->getOpcode() == ISD::CTTZ) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off @@ -342,7 +342,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0), N->getOperand(1)); @@ -351,7 +351,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned NewOpc = N->getOpcode(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT @@ -374,7 +374,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); @@ -384,7 +384,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (getTypeAction(N->getOperand(0).getValueType()) == TargetLowering::TypePromoteInteger) { @@ -415,11 +415,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), - N->getPointerInfo(), - N->getMemoryVT(), N->isVolatile(), - N->isNonTemporal(), N->getAlignment()); + N->getMemoryVT(), N->getMemOperand()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -433,7 +431,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); EVT ValueVTs[] = { N->getValueType(0), NVT }; SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; - SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(), + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), DAG.getVTList(ValueVTs, 2), Ops, 2); // Modified the sum result - switch anything that used the old sum to use @@ -453,7 +451,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { SDValue RHS = SExtPromotedInteger(N->getOperand(1)); EVT OVT = N->getOperand(0).getValueType(); EVT NVT = LHS.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Do the arithmetic in the larger type. unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB; @@ -476,15 +474,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) { // Sign extend the input. SDValue LHS = SExtPromotedInteger(N->getOperand(0)); SDValue RHS = SExtPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), - LHS.getValueType(), N->getOperand(0),LHS,RHS); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { @@ -492,23 +490,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { EVT OpTy = N->getOperand(1).getValueType(); // Promote all the way up to the canonical SetCC type. - Mask = PromoteTargetBoolean(Mask, TLI.getSetCCResultType(OpTy)); + Mask = PromoteTargetBoolean(Mask, getSetCCResultType(OpTy)); SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); - return DAG.getNode(ISD::VSELECT, N->getDebugLoc(), + return DAG.getNode(ISD::VSELECT, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(2)); SDValue RHS = GetPromotedInteger(N->getOperand(3)); - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(), N->getOperand(0), N->getOperand(1), LHS, RHS, N->getOperand(4)); } SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { - EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType()); + EVT SVT = getSetCCResultType(N->getOperand(0).getValueType()); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -517,13 +515,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { if (!TLI.isTypeLegal(SVT)) SVT = NVT; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() && "Vector compare must return a vector result!"); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (LHS.getValueType() != RHS.getValueType()) { + if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger && + !LHS.getValueType().isVector()) + LHS = GetPromotedInteger(LHS); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger && + !RHS.getValueType().isVector()) + RHS = GetPromotedInteger(RHS); + } + // Get the SETCC result using the canonical SETCC type. - SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), - N->getOperand(1), N->getOperand(2)); + SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, LHS, RHS, + N->getOperand(2)); assert(NVT.bitsLE(SVT) && "Integer type overpromoted?"); // Convert to the expected type. @@ -534,12 +543,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { SDValue Res = GetPromotedInteger(N->getOperand(0)); SDValue Amt = N->getOperand(1); Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; - return DAG.getNode(ISD::SHL, N->getDebugLoc(), Res.getValueType(), Res, Amt); + return DAG.getNode(ISD::SHL, SDLoc(N), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), Op.getValueType(), Op, N->getOperand(1)); } @@ -549,7 +558,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { // that too is okay if they are integer operations. SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = GetPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); } @@ -558,7 +567,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { SDValue Res = SExtPromotedInteger(N->getOperand(0)); SDValue Amt = N->getOperand(1); Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; - return DAG.getNode(ISD::SRA, N->getDebugLoc(), Res.getValueType(), Res, Amt); + return DAG.getNode(ISD::SRA, SDLoc(N), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { @@ -566,14 +575,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { SDValue Res = ZExtPromotedInteger(N->getOperand(0)); SDValue Amt = N->getOperand(1); Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; - return DAG.getNode(ISD::SRL, N->getDebugLoc(), Res.getValueType(), Res, Amt); + return DAG.getNode(ISD::SRL, SDLoc(N), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Res; SDValue InOp = N->getOperand(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); switch (getTypeAction(InOp.getValueType())) { default: llvm_unreachable("Unknown type action!"); @@ -618,7 +627,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); EVT OVT = N->getOperand(0).getValueType(); EVT NVT = LHS.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Do the arithmetic in the larger type. unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB; @@ -642,7 +651,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { return PromoteIntRes_Overflow(N); SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); EVT SmallVT = LHS.getValueType(); // To determine if the result overflowed in a larger type, we extend the @@ -690,7 +699,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { // Zero extend the input. SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); } @@ -703,7 +712,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { SDValue Chain = N->getOperand(0); // Get the chain. SDValue Ptr = N->getOperand(1); // Get the pointer. EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); MVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT); @@ -847,12 +856,12 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op); + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Op); } SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) { SDValue Op2 = GetPromotedInteger(N->getOperand(2)); - return DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), N->getMemoryVT(), + return DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(), N->getOrdering(), N->getSynchScope()); } @@ -881,7 +890,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "only know how to promote condition"); // Promote all the way up to the canonical SetCC type. - EVT SVT = TLI.getSetCCResultType(MVT::Other); + EVT SVT = getSetCCResultType(MVT::Other); SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); // The chain (Op#0) and basic block destination (Op#2) are always legal types. @@ -895,7 +904,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { SDValue Lo = ZExtPromotedInteger(N->getOperand(0)); SDValue Hi = GetPromotedInteger(N->getOperand(1)); assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi, DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy())); @@ -908,7 +917,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { // type does not have a strange size (eg: it is not i1). EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); - assert(!(NumElts & 1) && "Legal vector of one illegal element?"); + assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) && + "Legal vector of one illegal element?"); // Promote the inserted value. The type does not need to match the // vector element type. Check that any extra bits introduced will be @@ -931,7 +941,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) && "can only promote integer arguments"); SDValue InOp = GetPromotedInteger(N->getOperand(0)); - return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp, + return DAG.getConvertRndSat(N->getValueType(0), SDLoc(N), InOp, N->getOperand(1), N->getOperand(2), N->getOperand(3), N->getOperand(4), CvtCode); } @@ -955,7 +965,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, assert(OpNo == 2 && "Different operand and result vector types?"); // Promote the index. - SDValue Idx = ZExtPromotedInteger(N->getOperand(2)); + SDValue Idx = DAG.getZExtOrTrunc(N->getOperand(2), SDLoc(N), + TLI.getVectorIdxTy()); return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Idx), 0); } @@ -973,7 +984,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { EVT OpTy = N->getOperand(1).getValueType(); // Promote all the way up to the canonical SetCC type. - EVT SVT = TLI.getSetCCResultType(N->getOpcode() == ISD::SELECT ? + EVT SVT = getSetCCResultType(N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy); Cond = PromoteTargetBoolean(Cond, SVT); @@ -1011,7 +1022,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op, DAG.getValueType(N->getOperand(0).getValueType())); @@ -1025,22 +1036,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); SDValue Ch = N->getChain(), Ptr = N->getBasePtr(); - unsigned Alignment = N->getAlignment(); - bool isVolatile = N->isVolatile(); - bool isNonTemporal = N->isNonTemporal(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value. // Truncate the value and store the result. - return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(), - N->getMemoryVT(), - isVolatile, isNonTemporal, Alignment); + return DAG.getTruncStore(Ch, dl, Val, Ptr, + N->getMemoryVT(), N->getMemOperand()); } SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op); } SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { @@ -1049,7 +1056,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Op = GetPromotedInteger(N->getOperand(0)); Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); return DAG.getZeroExtendInReg(Op, dl, @@ -1127,7 +1134,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ATOMIC_LOAD_MAX: case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_LOAD_UMAX: - case ISD::ATOMIC_SWAP: { + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_CMP_SWAP: { std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N); SplitInteger(Tmp.first, Lo, Hi); ReplaceValueWith(SDValue(N, 1), Tmp.second); @@ -1180,6 +1188,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; } break; case ISD::ATOMIC_CMP_SWAP: @@ -1189,6 +1198,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; } break; case ISD::ATOMIC_LOAD_ADD: @@ -1198,6 +1208,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; } break; case ISD::ATOMIC_LOAD_SUB: @@ -1207,6 +1218,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; } break; case ISD::ATOMIC_LOAD_AND: @@ -1216,6 +1228,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; } break; case ISD::ATOMIC_LOAD_OR: @@ -1225,6 +1238,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; } break; case ISD::ATOMIC_LOAD_XOR: @@ -1234,6 +1248,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; } break; case ISD::ATOMIC_LOAD_NAND: @@ -1243,6 +1258,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; } break; } @@ -1254,7 +1270,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { /// and the shift amount is a constant 'Amt'. Expand the operation. void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi) { - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Expand the incoming operand to be shifted, so that we have its parts SDValue InL, InH; GetExpandedInteger(N->getOperand(0), InL, InH); @@ -1352,7 +1368,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { unsigned NVTBits = NVT.getScalarType().getSizeInBits(); assert(isPowerOf2_32(NVTBits) && "Expanded integer type size not a power of two!"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); APInt KnownZero, KnownOne; @@ -1439,7 +1455,7 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { unsigned NVTBits = NVT.getSizeInBits(); assert(isPowerOf2_32(NVTBits) && "Expanded integer type size not a power of two!"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Get the incoming operand to be shifted. SDValue InL, InH; @@ -1448,7 +1464,7 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy); SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode); SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt); - SDValue isShort = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy), + SDValue isShort = DAG.getSetCC(dl, getSetCCResultType(ShTy), Amt, NVBitsNode, ISD::SETULT); SDValue LoS, HiS, LoL, HiL; @@ -1467,8 +1483,8 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { LoL = DAG.getConstant(0, NVT); // Lo part is zero. HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part. - Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; case ISD::SRL: // Short: ShAmt < NVTBits @@ -1483,8 +1499,8 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { HiL = DAG.getConstant(0, NVT); // Hi part is zero. LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part. - Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; case ISD::SRA: // Short: ShAmt < NVTBits @@ -1500,15 +1516,15 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { DAG.getConstant(NVTBits-1, ShTy)); LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part. - Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; } } void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Expand the subcomponents. SDValue LHSL, LHSH, RHSL, RHSH; GetExpandedInteger(N->getOperand(0), LHSL, LHSH); @@ -1545,25 +1561,25 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, if (N->getOpcode() == ISD::ADD) { Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); - SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0], + SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); - SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1, - DAG.getConstant(1, NVT), - DAG.getConstant(0, NVT)); - SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1], + SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); + SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1], ISD::SETULT); - SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2, - DAG.getConstant(1, NVT), Carry1); + SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2, + DAG.getConstant(1, NVT), Carry1); Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); } else { Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2); SDValue Cmp = - DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()), + DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); - SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, - DAG.getConstant(1, NVT), - DAG.getConstant(0, NVT)); + SDValue Borrow = DAG.getSelect(dl, NVT, Cmp, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); } } @@ -1572,7 +1588,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N, SDValue &Lo, SDValue &Hi) { // Expand the subcomponents. SDValue LHSL, LHSH, RHSL, RHSH; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), LHSL, LHSH); GetExpandedInteger(N->getOperand(1), RHSL, RHSH); SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue); @@ -1598,7 +1614,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, SDValue &Lo, SDValue &Hi) { // Expand the subcomponents. SDValue LHSL, LHSH, RHSL, RHSH; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), LHSL, LHSH); GetExpandedInteger(N->getOperand(1), RHSL, RHSH); SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue); @@ -1623,7 +1639,7 @@ void DAGTypeLegalizer::ExpandIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo, void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Op = N->getOperand(0); if (Op.getValueType().bitsLE(NVT)) { // The low part is any extension of the input (which degenerates to a copy). @@ -1645,7 +1661,7 @@ void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT NVT = Lo.getValueType(); EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); @@ -1666,7 +1682,7 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT NVT = Lo.getValueType(); EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); @@ -1686,7 +1702,7 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands. Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo); Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi); @@ -1703,26 +1719,26 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32) GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT NVT = Lo.getValueType(); - SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi, + SDValue HiNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi, DAG.getConstant(0, NVT), ISD::SETNE); SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo); SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi); - Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ, - DAG.getNode(ISD::ADD, dl, NVT, LoLZ, - DAG.getConstant(NVT.getSizeInBits(), NVT))); + Lo = DAG.getSelect(dl, NVT, HiNotZero, HiLZ, + DAG.getNode(ISD::ADD, dl, NVT, LoLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); Hi = DAG.getConstant(0, NVT); } void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo) GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT NVT = Lo.getValueType(); @@ -1733,42 +1749,44 @@ void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32) GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT NVT = Lo.getValueType(); - SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, + SDValue LoNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, DAG.getConstant(0, NVT), ISD::SETNE); SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo); SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi); - Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ, - DAG.getNode(ISD::ADD, dl, NVT, HiLZ, - DAG.getConstant(NVT.getSizeInBits(), NVT))); + Lo = DAG.getSelect(dl, NVT, LoNotZero, LoLZ, + DAG.getNode(ISD::ADD, dl, NVT, HiLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); Hi = DAG.getConstant(0, NVT); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, dl), + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, + dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, dl), + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, + dl).first, Lo, Hi); } @@ -1790,7 +1808,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); bool isInvariant = N->isInvariant(); - DebugLoc dl = N->getDebugLoc(); + const MDNode *TBAAInfo = N->getTBAAInfo(); + SDLoc dl(N); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -1798,7 +1817,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, EVT MemVT = N->getMemoryVT(); Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), - MemVT, isVolatile, isNonTemporal, Alignment); + MemVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); // Remember the chain. Ch = Lo.getValue(1); @@ -1820,7 +1839,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } else if (TLI.isLittleEndian()) { // Little-endian - low bits are at low addresses. Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -1829,11 +1849,11 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1851,17 +1871,17 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); // Load the rest of the low bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1889,7 +1909,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue LL, LH, RL, RH; GetExpandedInteger(N->getOperand(0), LL, LH); GetExpandedInteger(N->getOperand(1), RL, RH); @@ -1901,7 +1921,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT); bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT); @@ -1984,7 +2004,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, dl), + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, + dl).first, Lo, Hi); } @@ -1992,7 +2013,7 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, SDValue &Lo, SDValue &Hi) { SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); // Expand the result by simply replacing it with the equivalent // non-overflow-checking operation. @@ -2033,7 +2054,7 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2047,13 +2068,13 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // If we can emit an efficient shift operation, do so now. Check to see if // the RHS is a constant. @@ -2142,7 +2163,8 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl).first, Lo, + Hi); return; } @@ -2153,7 +2175,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Op = N->getOperand(0); if (Op.getValueType().bitsLE(NVT)) { // The low part is sign extension of the input (degenerates to a copy). @@ -2183,7 +2205,7 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, void DAGTypeLegalizer:: ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); @@ -2211,7 +2233,7 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2225,13 +2247,13 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0)); Hi = DAG.getNode(ISD::SRL, dl, N->getOperand(0).getValueType(), N->getOperand(0), @@ -2243,7 +2265,7 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Expand the result by simply replacing it with the equivalent // non-overflow-checking operation. @@ -2265,7 +2287,7 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // A divide for UMULO should be faster than a function call. if (N->getOpcode() == ISD::UMULO) { @@ -2276,16 +2298,16 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, // A divide for UMULO will be faster than a function call. Select to // make sure we aren't using 0. - SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), + SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT), RHS, DAG.getConstant(0, VT), ISD::SETEQ); - SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero, - DAG.getConstant(1, VT), RHS); + SDValue NotZero = DAG.getSelect(dl, VT, isZero, + DAG.getConstant(1, VT), RHS); SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero); SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS, ISD::SETNE); - Overflow = DAG.getNode(ISD::SELECT, dl, N->getValueType(1), isZero, - DAG.getConstant(0, N->getValueType(1)), - Overflow); + Overflow = DAG.getSelect(dl, N->getValueType(1), isZero, + DAG.getConstant(0, N->getValueType(1)), + Overflow); ReplaceValueWith(SDValue(N, 1), Overflow); return; } @@ -2293,7 +2315,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); EVT PtrVT = TLI.getPointerTy(); Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); - + // Replace this with a libcall that will check overflow. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i32) @@ -2351,7 +2373,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2365,13 +2387,13 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2385,13 +2407,13 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Op = N->getOperand(0); if (Op.getValueType().bitsLE(NVT)) { // The low part is zero extension of the input (degenerates to a copy). @@ -2418,7 +2440,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = cast<AtomicSDNode>(N)->getMemoryVT(); SDValue Zero = DAG.getConstant(0, VT); SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT, @@ -2498,7 +2520,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - DebugLoc dl) { + SDLoc dl) { SDValue LHSLo, LHSHi, RHSLo, RHSHi; GetExpandedInteger(NewLHS, LHSLo, LHSHi); GetExpandedInteger(NewRHS, RHSLo, RHSHi); @@ -2555,16 +2577,16 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL); SDValue Tmp1, Tmp2; - Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()), + Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); if (!Tmp1.getNode()) - Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()), + Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo, RHSLo, LowCC); - Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()), + Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); if (!Tmp2.getNode()) Tmp2 = DAG.getNode(ISD::SETCC, dl, - TLI.getSetCCResultType(LHSHi.getValueType()), + getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, DAG.getCondCode(CCCode)); ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode()); @@ -2584,21 +2606,21 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, return; } - NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()), + NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETEQ, false, DagCombineInfo, dl); if (!NewLHS.getNode()) - NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + NewLHS = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETEQ); - NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(), - NewLHS, Tmp1, Tmp2); + NewLHS = DAG.getSelect(dl, Tmp1.getValueType(), + NewLHS, Tmp1, Tmp2); NewRHS = SDValue(); } SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); - IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -2616,7 +2638,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); - IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -2634,7 +2656,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); - IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, use it. if (NewRHS.getNode() == 0) { @@ -2672,7 +2694,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2689,7 +2711,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); - DebugLoc dl = N->getDebugLoc(); + const MDNode *TBAAInfo = N->getTBAAInfo(); + SDLoc dl(N); SDValue Lo, Hi; assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -2698,7 +2721,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { GetExpandedInteger(N->getValue(), Lo, Hi); return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), N->getMemoryVT(), isVolatile, isNonTemporal, - Alignment); + Alignment, TBAAInfo); } if (TLI.isLittleEndian()) { @@ -2706,7 +2729,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { GetExpandedInteger(N->getValue(), Lo, Hi); Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2715,11 +2738,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2747,17 +2770,17 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Store both the high bits and maybe some of the low bits. Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), - HiVT, isVolatile, isNonTemporal, Alignment); + HiVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); // Store the lowest ExcessBits bits in the second half. Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2765,14 +2788,14 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { SDValue InL, InH; GetExpandedInteger(N->getOperand(0), InL, InH); // Just truncate the low part of the source. - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), InL); } SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Op = N->getOperand(0); EVT SrcVT = Op.getValueType(); EVT DstVT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // The following optimization is valid only if every value in SrcVT (when // treated as signed) is representable in DstVT. Check that the mantissa @@ -2806,7 +2829,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Lo, Hi; GetExpandedInteger(Op, Lo, Hi); SDValue SignSet = DAG.getSetCC(dl, - TLI.getSetCCResultType(Hi.getValueType()), + getSetCCResultType(Hi.getValueType()), Hi, DAG.getConstant(0, Hi.getValueType()), ISD::SETLT); @@ -2819,10 +2842,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Zero = DAG.getIntPtrConstant(0); SDValue Four = DAG.getIntPtrConstant(4); if (TLI.isBigEndian()) std::swap(Zero, Four); - SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, - Zero, Four); + SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, + Zero, Four); unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment(); - FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset); + FudgePtr = DAG.getNode(ISD::ADD, dl, FudgePtr.getValueType(), + FudgePtr, Offset); Alignment = std::min(Alignment, 4u); // Load the value out, extending it from f32 to the destination float type. @@ -2839,11 +2863,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this UINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl).first; } SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, cast<AtomicSDNode>(N)->getMemoryVT(), N->getOperand(0), @@ -2865,7 +2889,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { unsigned OutNumElems = OutVT.getVectorNumElements(); EVT NOutVTElem = NOutVT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue BaseIdx = N->getOperand(1); SmallVector<SDValue, 8> Ops; @@ -2874,7 +2898,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { // Extract the element from the original vector. SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(), - BaseIdx, DAG.getIntPtrConstant(i)); + BaseIdx, DAG.getConstant(i, BaseIdx.getValueType())); SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InVT.getVectorElementType(), N->getOperand(0), Index); @@ -2890,7 +2914,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N); EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned NumElts = VT.getVectorNumElements(); SmallVector<int, 8> NewMask; @@ -2913,12 +2937,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { unsigned NumElems = N->getNumOperands(); EVT NOutVTElem = NOutVT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SmallVector<SDValue, 8> Ops; Ops.reserve(NumElems); for (unsigned i = 0; i != NumElems; ++i) { - SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); + SDValue Op; + // BUILD_VECTOR integer operand types are allowed to be larger than the + // result's element type. This may still be true after the promotion. For + // example, we might be promoting (<v?i1> = BV <i32>, <i32>, ...) to + // (v?i16 = BV <i32>, <i32>, ...), and we can't any_extend <i32> to <i16>. + if (N->getOperand(i).getValueType().bitsLT(NOutVTElem)) + Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); + else + Op = N->getOperand(i); Ops.push_back(Op); } @@ -2927,7 +2959,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); assert(!N->getOperand(0).getValueType().isVector() && "Input must be a scalar"); @@ -2943,7 +2975,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); @@ -2964,7 +2996,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { SDValue Op = N->getOperand(i); for (unsigned j = 0; j < NumElem; ++j) { SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - InElemTy, Op, DAG.getIntPtrConstant(j)); + InElemTy, Op, DAG.getConstant(j, + TLI.getVectorIdxTy())); Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); } } @@ -2979,7 +3012,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { EVT NOutVTElem = NOutVT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue V0 = GetPromotedInteger(N->getOperand(0)); SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl, @@ -2989,9 +3022,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue V0 = GetPromotedInteger(N->getOperand(0)); - SDValue V1 = N->getOperand(1); + SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl, TLI.getVectorIdxTy()); SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, V0->getValueType(0).getScalarType(), V0, V1); @@ -3002,7 +3035,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned NumElems = N->getNumOperands(); EVT RetSclrTy = N->getValueType(0).getVectorElementType(); @@ -3019,7 +3052,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { for (unsigned i=0; i<NumElem; ++i) { // Extract element from incoming vector SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, - Incoming, DAG.getIntPtrConstant(i)); + Incoming, DAG.getConstant(i, TLI.getVectorIdxTy())); SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex); NewOps.push_back(Tr); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index a7d5fb0..eb13230 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -738,9 +738,6 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { SDValue &OpEntry = PromotedIntegers[Op]; assert(OpEntry.getNode() == 0 && "Node is already promoted!"); OpEntry = Result; - - // Propagate node ordering - DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { @@ -752,9 +749,6 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { SDValue &OpEntry = SoftenedFloats[Op]; assert(OpEntry.getNode() == 0 && "Node is already converted to integer!"); OpEntry = Result; - - // Propagate node ordering - DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { @@ -769,9 +763,6 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { SDValue &OpEntry = ScalarizedVectors[Op]; assert(OpEntry.getNode() == 0 && "Node is already scalarized!"); OpEntry = Result; - - // Propagate node ordering - DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, @@ -799,10 +790,6 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; - - // Propagate ordering - DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); - DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, @@ -830,10 +817,6 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; - - // Propagate ordering - DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); - DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, @@ -863,10 +846,6 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already split"); Entry.first = Lo; Entry.second = Hi; - - // Propagate ordering - DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); - DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { @@ -878,9 +857,6 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { SDValue &OpEntry = WidenedVectors[Op]; assert(OpEntry.getNode() == 0 && "Node already widened!"); OpEntry = Result; - - // Propagate node ordering - DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } @@ -891,7 +867,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { /// BitConvertToInteger - Convert to an integer of the same size. SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { unsigned BitWidth = Op.getValueType().getSizeInBits(); - return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(Op), EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op); } @@ -902,13 +878,13 @@ SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits(); EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); unsigned NumElts = Op.getValueType().getVectorNumElements(); - return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(Op), EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op); } SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, EVT DestVT) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Create the stack frame object. Make sure it is aligned for both // the source and destination types. SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT); @@ -948,8 +924,6 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { "Custom lowering returned the wrong number of results!"); for (unsigned i = 0, e = Results.size(); i != e; ++i) { ReplaceValueWith(SDValue(N, i), Results[i]); - // Propagate node ordering - DAG.AssignOrdering(Results[i].getNode(), DAG.GetOrdering(N)); } return true; } @@ -984,25 +958,11 @@ SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) { return SDValue(N->getOperand(ResNo)); } -/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type -/// which is split into two not necessarily identical pieces. -void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) { - // Currently all types are split in half. - if (!InVT.isVector()) { - LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); - } else { - unsigned NumElements = InVT.getVectorNumElements(); - assert(!(NumElements & 1) && "Splitting vector, but not in half!"); - LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), NumElements/2); - } -} - /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and /// high parts of the given value. void DAGTypeLegalizer::GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = Pair.getDebugLoc(); + SDLoc dl(Pair); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType()); Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, DAG.getIntPtrConstant(0)); @@ -1012,12 +972,9 @@ void DAGTypeLegalizer::GetPairElements(SDValue Pair, SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index) { - DebugLoc dl = Index.getDebugLoc(); + SDLoc dl(Index); // Make sure the index type is big enough to compute in. - if (Index.getValueType().bitsGT(TLI.getPointerTy())) - Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index); - else - Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index); + Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy()); // Calculate the element offset and add it to the pointer. unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. @@ -1029,9 +986,9 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, /// JoinIntegers - Build an integer with low bits Lo and high bits Hi. SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { - // Arbitrarily use dlHi for result DebugLoc - DebugLoc dlHi = Hi.getDebugLoc(); - DebugLoc dlLo = Lo.getDebugLoc(); + // Arbitrarily use dlHi for result SDLoc + SDLoc dlHi(Hi); + SDLoc dlLo(Lo); EVT LVT = Lo.getValueType(); EVT HVT = Hi.getValueType(); EVT NVT = EVT::getIntegerVT(*DAG.getContext(), @@ -1048,22 +1005,25 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned) { unsigned NumOps = N->getNumOperands(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, + dl).first; } else if (NumOps == 1) { SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, + dl).first; } else if (NumOps == 2) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, + dl).first; } SmallVector<SDValue, 8> Ops(NumOps); for (unsigned i = 0; i < NumOps; ++i) Ops[i] = N->getOperand(i); return TLI.makeLibCall(DAG, LC, N->getValueType(0), - &Ops[0], NumOps, isSigned, dl); + &Ops[0], NumOps, isSigned, dl).first; } // ExpandChainLibCall - Expand a node into a call to a libcall. Similar to @@ -1093,7 +1053,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, Node->getDebugLoc()); + Callee, Args, DAG, SDLoc(Node)); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); return CallInfo; @@ -1103,7 +1063,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, /// of the given type. A target boolean is an integer value, not necessarily of /// type i1, the bits of which conform to getBooleanContents. SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) { - DebugLoc dl = Bool.getDebugLoc(); + SDLoc dl(Bool); ISD::NodeType ExtendCode = TargetLowering::getExtendForContent(TLI.getBooleanContents(VT.isVector())); return DAG.getNode(ExtendCode, dl, VT, Bool); @@ -1114,7 +1074,7 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) { void DAGTypeLegalizer::SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() == Op.getValueType().getSizeInBits() && "Invalid integer splitting!"); Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 1c4274a..13bb08f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1,4 +1,4 @@ -//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===// +//===-- LegalizeTypes.h - DAG Type Legalizer class definition ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -73,6 +73,10 @@ private: return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal; } + EVT getSetCCResultType(EVT VT) const { + return TLI.getSetCCResultType(*DAG.getContext(), VT); + } + /// IgnoreNodeResults - Pretend all of this node's results are legal. bool IgnoreNodeResults(SDNode *N) const { return N->getOpcode() == ISD::TargetConstant; @@ -195,7 +199,7 @@ private: /// final size. SDValue SExtPromotedInteger(SDValue Op) { EVT OldVT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); Op = GetPromotedInteger(Op); return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op, DAG.getValueType(OldVT)); @@ -205,7 +209,7 @@ private: /// final size. SDValue ZExtPromotedInteger(SDValue Op) { EVT OldVT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); Op = GetPromotedInteger(Op); return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType()); } @@ -357,7 +361,7 @@ private: SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N); void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, - ISD::CondCode &CCCode, DebugLoc dl); + ISD::CondCode &CCCode, SDLoc dl); //===--------------------------------------------------------------------===// // Float to Integer Conversion Support: LegalizeFloatTypes.cpp @@ -406,6 +410,7 @@ private: SDValue SoftenFloatRes_FPOWI(SDNode *N); SDValue SoftenFloatRes_FREM(SDNode *N); SDValue SoftenFloatRes_FRINT(SDNode *N); + SDValue SoftenFloatRes_FROUND(SDNode *N); SDValue SoftenFloatRes_FSIN(SDNode *N); SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); @@ -466,6 +471,7 @@ private: void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -476,6 +482,7 @@ private: // Float Operand Expansion. bool ExpandFloatOperand(SDNode *N, unsigned OperandNo); SDValue ExpandFloatOp_BR_CC(SDNode *N); + SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N); SDValue ExpandFloatOp_FP_ROUND(SDNode *N); SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N); SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N); @@ -484,7 +491,7 @@ private: SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo); void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, - ISD::CondCode &CCCode, DebugLoc dl); + ISD::CondCode &CCCode, SDLoc dl); //===--------------------------------------------------------------------===// // Scalarization Support: LegalizeVectorTypes.cpp @@ -530,7 +537,7 @@ private: // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_BITCAST(SDNode *N); - SDValue ScalarizeVecOp_EXTEND(SDNode *N); + SDValue ScalarizeVecOp_UnaryOp(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); @@ -554,6 +561,7 @@ private: void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -624,6 +632,7 @@ private: SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); + SDValue WidenVecRes_BinaryCanTrap(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); @@ -649,7 +658,7 @@ private: /// loads to load a vector with a resulting wider type. It takes /// LdChain: list of chains for the load to be generated. /// Ld: load to widen - SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, + SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, LoadSDNode *LD); /// GenWidenVectorExtLoads - Helper function to generate a set of extension @@ -657,20 +666,20 @@ private: /// LdChain: list of chains for the load to be generated. /// Ld: load to widen /// ExtType: extension element type - SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, + SDValue GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, LoadSDNode *LD, ISD::LoadExtType ExtType); /// Helper genWidenVectorStores - Helper function to generate a set of /// stores to store a widen vector into non widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value - void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST); + void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); /// Helper genWidenVectorTruncStores - Helper function to generate a set of /// stores to store a truncate widen vector into non widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value - void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, + void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); /// Modifies a vector input (widen or narrows) to a vector of NVT. The @@ -695,10 +704,6 @@ private: GetExpandedFloat(Op, Lo, Hi); } - /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type - /// which is split (or expanded) into two not necessarily identical pieces. - void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT); - /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and /// high parts of the given value. void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi); @@ -726,6 +731,12 @@ private: GetExpandedFloat(Op, Lo, Hi); } + + /// This function will split the integer \p Op into \p NumElements + /// operations of type \p EltVT and store them in \p Ops. + void IntegerToVector(SDValue Op, unsigned NumElements, + SmallVectorImpl<SDValue> &Ops, EVT EltVT); + // Generic Result Expansion. void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 222d1c0..c749fde 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -41,7 +41,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Handle some special cases efficiently. switch (getTypeAction(InVT)) { @@ -77,12 +77,9 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypeWidenVector: { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); InOp = GetWidenedVector(InOp); - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - InVT.getVectorNumElements()/2); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + EVT LoVT, HiVT; + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); + llvm::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); @@ -115,7 +112,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { SmallVector<SDValue, 8> Vals; for (unsigned i = 0; i < NumElems; ++i) Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, - CastInOp, DAG.getIntPtrConstant(i))); + CastInOp, DAG.getConstant(i, + TLI.getVectorIdxTy()))); // Build Lo, Hi pair by pairing extracted elements if needed. unsigned Slot = 0; @@ -161,13 +159,14 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { false, false, 0); // Load the first half from the stack slot. - Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, false, 0); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, + StackPtr.getValueType())); // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, @@ -203,7 +202,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue OldVec = N->getOperand(0); unsigned OldElts = OldVec.getValueType().getVectorNumElements(); EVT OldEltVT = OldVec.getValueType().getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Convert to a vector of the expanded element type, for example // <3 x i64> -> <6 x i32>. @@ -227,10 +226,6 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. SDValue Idx = N->getOperand(1); - // Make sure the type of Idx is big enough to hold the new values. - if (Idx.getValueType().bitsLT(TLI.getPointerTy())) - Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); - Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); @@ -245,7 +240,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(ISD::isNormalLoad(N) && "This routine only for normal loads!"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); LoadSDNode *LD = cast<LoadSDNode>(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); @@ -255,20 +250,22 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -289,7 +286,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDValue Chain = N->getOperand(0); SDValue Ptr = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); const unsigned Align = N->getConstantOperandVal(3); Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align); @@ -309,29 +306,54 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { // Generic Operand Expansion. //===--------------------------------------------------------------------===// +void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements, + SmallVectorImpl<SDValue> &Ops, + EVT EltVT) { + assert(Op.getValueType().isInteger()); + SDLoc DL(Op); + SDValue Parts[2]; + + if (NumElements > 1) { + NumElements >>= 1; + SplitInteger(Op, Parts[0], Parts[1]); + if (TLI.isBigEndian()) + std::swap(Parts[0], Parts[1]); + IntegerToVector(Parts[0], NumElements, Ops, EltVT); + IntegerToVector(Parts[1], NumElements, Ops, EltVT); + } else { + Ops.push_back(DAG.getNode(ISD::BITCAST, DL, EltVT, Op)); + } +} + SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (N->getValueType(0).isVector()) { // An illegal expanding type is being converted to a legal vector type. // Make a two element vector out of the expanded parts and convert that // instead, but only if the new vector type is legal (otherwise there // is no point, and it might create expansion loops). For example, on // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32. + // + // FIXME: I'm not sure why we are first trying to split the input into + // a 2 element vector, so I'm leaving it here to maintain the current + // behavior. + unsigned NumElts = 2; EVT OVT = N->getOperand(0).getValueType(); EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), - 2); + NumElts); + if (!isTypeLegal(NVT)) { + // If we can't find a legal type by splitting the integer in half, + // then we can use the node's value type. + NumElts = N->getValueType(0).getVectorNumElements(); + NVT = N->getValueType(0); + } - if (isTypeLegal(NVT)) { - SDValue Parts[2]; - GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]); + SmallVector<SDValue, 8> Ops; + IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType()); - if (TLI.isBigEndian()) - std::swap(Parts[0], Parts[1]); - - SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2); - return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); - } + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], NumElts); + return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); } // Otherwise, store to a temporary and load out again as the new type. @@ -344,7 +366,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { unsigned NumElts = VecVT.getVectorNumElements(); EVT OldVT = N->getOperand(0).getValueType(); EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); assert(OldVT == VecVT.getVectorElementType() && "BUILD_VECTOR operand type doesn't match vector element type!"); @@ -382,7 +404,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { // The vector type is legal but the element type needs expansion. EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Val = N->getOperand(1); EVT OldEVT = Val.getValueType(); @@ -406,7 +428,8 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx); Idx = DAG.getNode(ISD::ADD, dl, - Idx.getValueType(), Idx, DAG.getIntPtrConstant(1)); + Idx.getValueType(), Idx, + DAG.getConstant(1, Idx.getValueType())); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); // Convert the new vector to the old vector type. @@ -414,7 +437,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { } SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); assert(VT.getVectorElementType() == N->getOperand(0).getValueType() && "SCALAR_TO_VECTOR operand type doesn't match vector element type!"); @@ -430,7 +453,7 @@ SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { assert(ISD::isNormalStore(N) && "This routine only for normal stores!"); assert(OpNo == 1 && "Can only expand the stored value so far"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); StoreSDNode *St = cast<StoreSDNode>(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), @@ -440,6 +463,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { unsigned Alignment = St->getAlignment(); bool isVolatile = St->isVolatile(); bool isNonTemporal = St->isNonTemporal(); + const MDNode *TBAAInfo = St->getTBAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -451,15 +475,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); - assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!"); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -483,21 +506,19 @@ void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LL, LH, RL, RH, CL, CH; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitOp(N->getOperand(1), LL, LH); GetSplitOp(N->getOperand(2), RL, RH); SDValue Cond = N->getOperand(0); CL = CH = Cond; if (Cond.getValueType().isVector()) { - assert(Cond.getValueType().getVectorElementType() == MVT::i1 && - "Condition legalized before result?"); - unsigned NumElements = Cond.getValueType().getVectorNumElements(); - EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2); - CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, - DAG.getIntPtrConstant(0)); - CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, - DAG.getIntPtrConstant(NumElements / 2)); + // Check if there are already splitted versions of the vector available and + // use those instead of splitting the mask operand again. + if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Cond, CL, CH); + else + llvm::tie(CL, CH) = DAG.SplitVector(Cond, dl); } Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL); @@ -507,7 +528,7 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LL, LH, RL, RH; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitOp(N->getOperand(2), LL, LH); GetSplitOp(N->getOperand(3), RL, RH); @@ -519,7 +540,7 @@ void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getUNDEF(LoVT); Hi = DAG.getUNDEF(HiVT); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index c6e066e..2c3cdcc 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -171,7 +171,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { return TranslateLegalizeResults(Op, Result); case TargetLowering::Custom: Changed = true; - return LegalizeOp(TLI.LowerOperation(Result, DAG)); + return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG)); case TargetLowering::Expand: Changed = true; return LegalizeOp(ExpandStore(Op)); @@ -227,6 +227,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FP_TO_UINT: case ISD::FNEG: case ISD::FABS: + case ISD::FCOPYSIGN: case ISD::FSQRT: case ISD::FSIN: case ISD::FCOS: @@ -241,6 +242,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FTRUNC: case ISD::FRINT: case ISD::FNEARBYINT: + case ISD::FROUND: case ISD::FFLOOR: case ISD::FP_ROUND: case ISD::FP_EXTEND: @@ -320,7 +322,7 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { assert(Op.getNode()->getNumValues() == 1 && "Can't promote a vector with multiple results!"); MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SmallVector<SDValue, 4> Operands(Op.getNumOperands()); for (unsigned j = 0; j != Op.getNumOperands(); ++j) { @@ -357,7 +359,7 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { // Build a new vector type and check if it is legal. MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SmallVector<SDValue, 4> Operands(Op.getNumOperands()); unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : @@ -375,7 +377,7 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { SDValue VectorLegalizer::ExpandLoad(SDValue Op) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); SDValue Chain = LD->getChain(); SDValue BasePTR = LD->getBasePtr(); @@ -416,7 +418,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + LD->isInvariant(), LD->getAlignment(), + LD->getTBAAInfo()); } else { EVT LoadVT = WideVT; while (RemainingBytes < LoadBytes) { @@ -426,13 +429,14 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LoadVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->isNonTemporal(), LD->getAlignment(), + LD->getTBAAInfo()); } RemainingBytes -= LoadBytes; Offset += LoadBytes; BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(LoadBytes)); + DAG.getConstant(LoadBytes, BasePTR.getValueType())); LoadVals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -497,10 +501,10 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), SrcVT.getScalarType(), LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + LD->getAlignment(), LD->getTBAAInfo()); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(Stride)); + DAG.getConstant(Stride, BasePTR.getValueType())); Vals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -519,7 +523,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { } SDValue VectorLegalizer::ExpandStore(SDValue Op) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); SDValue Chain = ST->getChain(); SDValue BasePTR = ST->getBasePtr(); @@ -529,6 +533,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); unsigned NumElem = StVT.getVectorNumElements(); // The type of the data we want to save @@ -551,15 +556,15 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { SmallVector<SDValue, 8> Stores; for (unsigned Idx = 0; Idx < NumElem; Idx++) { SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - RegSclVT, Value, DAG.getIntPtrConstant(Idx)); + RegSclVT, Value, DAG.getConstant(Idx, TLI.getVectorIdxTy())); // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(Stride)); + DAG.getConstant(Stride, BasePTR.getValueType())); Stores.push_back(Store); } @@ -572,9 +577,9 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { // Lower a select instruction where the condition is a scalar and the // operands are vectors. Lower this select to VSELECT and implement it - // using XOR AND OR. The selector bit is broadcasted. + // using XOR AND OR. The selector bit is broadcasted. EVT VT = Op.getValueType(); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue Mask = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -597,15 +602,12 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { return DAG.UnrollVectorOp(Op.getNode()); // Generate a mask operand. - EVT MaskTy = TLI.getSetCCResultType(VT); - assert(MaskTy.isVector() && "Invalid CC type"); - assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits() - && "Invalid mask size"); + EVT MaskTy = VT.changeVectorElementTypeToInteger(); // What is the size of each element in the vector mask. EVT BitTy = MaskTy.getScalarType(); - Mask = DAG.getNode(ISD::SELECT, DL, BitTy, Mask, + Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy), DAG.getConstant(0, BitTy)); @@ -637,7 +639,7 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand) return DAG.UnrollVectorOp(Op.getNode()); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); unsigned BW = VT.getScalarType().getSizeInBits(); @@ -652,13 +654,14 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. - EVT VT = Op.getOperand(0).getValueType(); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue Mask = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue Op2 = Op.getOperand(2); + EVT VT = Mask.getValueType(); + // If we can't even use the basic vector operations of // AND,OR,XOR, we will have to scalarize the op. // Notice that the operation may be 'promoted' which means that it is @@ -673,8 +676,12 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { TargetLowering::ZeroOrNegativeOneBooleanContent) return DAG.UnrollVectorOp(Op.getNode()); - assert(VT.getSizeInBits() == Op1.getValueType().getSizeInBits() - && "Invalid mask size"); + // If the mask and the type are different sizes, unroll the vector op. This + // can occur when getSetCCResultType returns something that is different in + // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. + if (VT.getSizeInBits() != Op1.getValueType().getSizeInBits()) + return DAG.UnrollVectorOp(Op.getNode()); + // Bitcast the operands to be the same type as the mask. // This is needed when we select between FP types because // the mask is a vector of integers. @@ -693,7 +700,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { EVT VT = Op.getOperand(0).getValueType(); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); // Make sure that the SINT_TO_FP and SRL instructions are available. if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || @@ -734,7 +741,7 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType()); - return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), Zero, Op.getOperand(0)); } return DAG.UnrollVectorOp(Op.getNode()); @@ -746,19 +753,20 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { EVT EltVT = VT.getVectorElementType(); SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); EVT TmpEltVT = LHS.getValueType().getVectorElementType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SmallVector<SDValue, 8> Ops(NumElems); for (unsigned i = 0; i < NumElems; ++i) { SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, - DAG.getIntPtrConstant(i)); + DAG.getConstant(i, TLI.getVectorIdxTy())); SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, - DAG.getIntPtrConstant(i)); - Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT), + DAG.getConstant(i, TLI.getVectorIdxTy())); + Ops[i] = DAG.getNode(ISD::SETCC, dl, + TLI.getSetCCResultType(*DAG.getContext(), TmpEltVT), LHSElem, RHSElem, CC); - Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i], - DAG.getConstant(APInt::getAllOnesValue - (EltVT.getSizeInBits()), EltVT), - DAG.getConstant(0, EltVT)); + Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], + DAG.getConstant(APInt::getAllOnesValue + (EltVT.getSizeInBits()), EltVT), + DAG.getConstant(0, EltVT)); } return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 04c6bfd..f7a3e3d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -83,6 +83,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -97,6 +98,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::ADD: case ISD::AND: case ISD::FADD: + case ISD::FCOPYSIGN: case ISD::FDIV: case ISD::FMUL: case ISD::FPOW: @@ -128,7 +130,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); } @@ -136,7 +138,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { SDValue Op0 = GetScalarizedVector(N->getOperand(0)); SDValue Op1 = GetScalarizedVector(N->getOperand(1)); SDValue Op2 = GetScalarizedVector(N->getOperand(2)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1, Op2); } @@ -148,7 +150,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { EVT NewVT = N->getValueType(0).getVectorElementType(); - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(N), NewVT, N->getOperand(0)); } @@ -158,14 +160,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { // The BUILD_VECTOR operands may be of wider element types and // we may need to truncate them back to the requested return type. if (EltVT.isInteger()) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); return InOp; } SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { EVT NewVT = N->getValueType(0).getVectorElementType(); SDValue Op0 = GetScalarizedVector(N->getOperand(0)); - return DAG.getConvertRndSat(NewVT, N->getDebugLoc(), + return DAG.getConvertRndSat(NewVT, SDLoc(N), Op0, DAG.getValueType(NewVT), DAG.getValueType(Op0.getValueType()), N->getOperand(3), @@ -174,7 +176,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { } SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), N->getValueType(0).getVectorElementType(), N->getOperand(0), N->getOperand(1)); } @@ -182,13 +184,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { EVT NewVT = N->getValueType(0).getVectorElementType(); SDValue Op = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), NewVT, Op, N->getOperand(1)); } SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { SDValue Op = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::FPOWI, N->getDebugLoc(), + return DAG.getNode(ISD::FPOWI, SDLoc(N), Op.getValueType(), Op, N->getOperand(1)); } @@ -199,7 +201,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { EVT EltVT = N->getValueType(0).getVectorElementType(); if (Op.getValueType() != EltVT) // FIXME: Can this happen for floating point types? - Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op); + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op); return Op; } @@ -209,13 +211,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getExtensionType(), N->getValueType(0).getVectorElementType(), - N->getDebugLoc(), + SDLoc(N), N->getChain(), N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()), N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), - N->isInvariant(), N->getOriginalAlignment()); + N->isInvariant(), N->getOriginalAlignment(), + N->getTBAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -227,14 +230,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. EVT DestVT = N->getValueType(0).getVectorElementType(); SDValue Op = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op); + return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); } SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { EVT EltVT = N->getValueType(0).getVectorElementType(); EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType(); SDValue LHS = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), EltVT, + return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT, LHS, DAG.getValueType(ExtVT)); } @@ -244,7 +247,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { EVT EltVT = N->getValueType(0).getVectorElementType(); SDValue InOp = N->getOperand(0); if (InOp.getValueType() != EltVT) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); return InOp; } @@ -262,33 +265,34 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { assert(VecBool == TargetLowering::UndefinedBooleanContent || VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent); // Vector read from all ones, scalar expects a single 1 so mask. - Cond = DAG.getNode(ISD::AND, N->getDebugLoc(), CondVT, + Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT, Cond, DAG.getConstant(1, CondVT)); break; case TargetLowering::ZeroOrNegativeOneBooleanContent: assert(VecBool == TargetLowering::UndefinedBooleanContent || VecBool == TargetLowering::ZeroOrOneBooleanContent); // Vector reads from a one, scalar from all ones so sign extend. - Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), CondVT, + Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT, Cond, DAG.getValueType(MVT::i1)); break; } } - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), - LHS.getValueType(), Cond, LHS, - GetScalarizedVector(N->getOperand(2))); + + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), Cond, LHS, + GetScalarizedVector(N->getOperand(2))); } SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(1)); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), - LHS.getValueType(), N->getOperand(0), LHS, - GetScalarizedVector(N->getOperand(2))); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, + GetScalarizedVector(N->getOperand(2))); } SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(2)); - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(), + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(), N->getOperand(0), N->getOperand(1), LHS, GetScalarizedVector(N->getOperand(3)), N->getOperand(4)); @@ -303,7 +307,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Turn it into a scalar SETCC. return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); @@ -330,7 +334,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); EVT NVT = N->getValueType(0).getVectorElementType(); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Turn it into a scalar SETCC. SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, @@ -368,7 +372,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: - Res = ScalarizeVecOp_EXTEND(N); + case ISD::TRUNCATE: + Res = ScalarizeVecOp_UnaryOp(N); break; case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); @@ -401,22 +406,22 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { /// to be scalarized, it must be <1 x ty>. Convert the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { SDValue Elt = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Elt); } /// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs /// to be scalarized, it must be <1 x ty>. Extend the element instead. -SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTEND(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { assert(N->getValueType(0).getVectorNumElements() == 1 && "Unexected vector type!"); SDValue Elt = GetScalarizedVector(N->getOperand(0)); SmallVector<SDValue, 1> Ops(1); - Ops[0] = DAG.getNode(N->getOpcode(), N->getDebugLoc(), + Ops[0] = DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0).getScalarType(), Elt); // Revectorize the result so the types line up with what the uses of this // expression expect. - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), &Ops[0], 1); } @@ -426,7 +431,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SmallVector<SDValue, 8> Ops(N->getNumOperands()); for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) Ops[i] = GetScalarizedVector(N->getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), &Ops[0], Ops.size()); } @@ -436,7 +441,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue Res = GetScalarizedVector(N->getOperand(0)); if (Res.getValueType() != N->getValueType(0)) - Res = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), + Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Res); return Res; } @@ -446,7 +451,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ assert(N->isUnindexed() && "Indexed store of one-element vector?"); assert(OpNo == 1 && "Do not know how to scalarize this operand!"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (N->isTruncatingStore()) return DAG.getTruncStore(N->getChain(), dl, @@ -454,12 +459,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getBasePtr(), N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), - N->getAlignment()); + N->getAlignment(), N->getTBAAInfo()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getPointerInfo(), N->isVolatile(), N->isNonTemporal(), - N->getOriginalAlignment()); + N->getOriginalAlignment(), N->getTBAAInfo()); } @@ -516,7 +521,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; - case ISD::ANY_EXTEND: case ISD::CONVERT_RNDSAT: case ISD::CTLZ: case ISD::CTTZ: @@ -539,21 +543,27 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: - case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: case ISD::TRUNCATE: case ISD::UINT_TO_FP: - case ISD::ZERO_EXTEND: SplitVecRes_UnaryOp(N, Lo, Hi); break; + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + SplitVecRes_ExtendOp(N, Lo, Hi); + break; + case ISD::ADD: case ISD::SUB: case ISD::MUL: case ISD::FADD: + case ISD::FCOPYSIGN: case ISD::FSUB: case ISD::FMUL: case ISD::SDIV: @@ -587,7 +597,7 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, GetSplitVector(N->getOperand(0), LHSLo, LHSHi); SDValue RHSLo, RHSHi; GetSplitVector(N->getOperand(1), RHSLo, RHSHi); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo); Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); @@ -601,7 +611,7 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi); SDValue Op2Lo, Op2Hi; GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), Op0Lo, Op1Lo, Op2Lo); @@ -614,8 +624,8 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, // We know the result is a vector. The input may be either a vector or a // scalar value. EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); - DebugLoc dl = N->getDebugLoc(); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + SDLoc dl(N); SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); @@ -668,8 +678,8 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; - DebugLoc dl = N->getDebugLoc(); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + SDLoc dl(N); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); unsigned LoNumElts = LoVT.getVectorNumElements(); SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts); Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size()); @@ -681,7 +691,7 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned NumSubvectors = N->getNumOperands() / 2; if (NumSubvectors == 1) { Lo = N->getOperand(0); @@ -690,7 +700,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, } EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors); Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size()); @@ -703,20 +713,21 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); SDValue Idx = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, - DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements())); + DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), + TLI.getVectorIdxTy())); } void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitVector(N->getOperand(0), Lo, Hi); Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1)); Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); @@ -726,10 +737,11 @@ void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; GetSplitVector(N->getOperand(0), LHSLo, LHSHi); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT LoVT, HiVT; - GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = + DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT()); Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, DAG.getValueType(LoVT)); @@ -742,7 +754,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue Vec = N->getOperand(0); SDValue Elt = N->getOperand(1); SDValue Idx = N->getOperand(2); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitVector(Vec, Lo, Hi); if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) { @@ -753,7 +765,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, Lo.getValueType(), Lo, Elt, Idx); else Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, - DAG.getIntPtrConstant(IdxVal - LoNumElts)); + DAG.getConstant(IdxVal - LoNumElts, + TLI.getVectorIdxTy())); return; } @@ -780,7 +793,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, StackPtr.getValueType())); // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), @@ -790,8 +803,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; - DebugLoc dl = N->getDebugLoc(); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + SDLoc dl(N); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); Hi = DAG.getUNDEF(HiVT); } @@ -800,8 +813,8 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi) { assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); EVT LoVT, HiVT; - DebugLoc dl = LD->getDebugLoc(); - GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT); + SDLoc dl(LD); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); ISD::LoadExtType ExtType = LD->getExtensionType(); SDValue Ch = LD->getChain(); @@ -812,20 +825,22 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); EVT LoMemVT, HiMemVT; - GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, - isInvariant, Alignment); + isInvariant, Alignment, TBAAInfo); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment); + HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -843,23 +858,13 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { "Operand types must be vectors"); EVT LoVT, HiVT; - DebugLoc DL = N->getDebugLoc(); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + SDLoc DL(N); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // Split the input. - EVT InVT = N->getOperand(0).getValueType(); SDValue LL, LH, RL, RH; - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(0)); - LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - - RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), - DAG.getIntPtrConstant(0)); - RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); @@ -869,22 +874,16 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi) { // Get the dest types - they may not match the input types, e.g. int_to_fp. EVT LoVT, HiVT; - DebugLoc dl = N->getDebugLoc(); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + SDLoc dl(N); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // If the input also splits, handle it directly for a compile time speedup. // Otherwise split it by hand. EVT InVT = N->getOperand(0).getValueType(); - if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) GetSplitVector(N->getOperand(0), Lo, Hi); - } else { - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - } + else + llvm::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); if (N->getOpcode() == ISD::FP_ROUND) { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); @@ -907,11 +906,63 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, } } +void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + EVT SrcVT = N->getOperand(0).getValueType(); + EVT DestVT = N->getValueType(0); + EVT LoVT, HiVT; + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); + + // We can do better than a generic split operation if the extend is doing + // more than just doubling the width of the elements and the following are + // true: + // - The number of vector elements is even, + // - the source type is legal, + // - the type of a split source is illegal, + // - the type of an extended (by doubling element size) source is legal, and + // - the type of that extended source when split is legal. + // + // This won't necessarily completely legalize the operation, but it will + // more effectively move in the right direction and prevent falling down + // to scalarization in many cases due to the input vector being split too + // far. + unsigned NumElements = SrcVT.getVectorNumElements(); + if ((NumElements & 1) == 0 && + SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) { + LLVMContext &Ctx = *DAG.getContext(); + EVT NewSrcVT = EVT::getVectorVT( + Ctx, EVT::getIntegerVT( + Ctx, SrcVT.getVectorElementType().getSizeInBits() * 2), + NumElements); + EVT SplitSrcVT = + EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2); + EVT SplitLoVT, SplitHiVT; + llvm::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); + if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && + TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { + DEBUG(dbgs() << "Split vector extend via incremental extend:"; + N->dump(&DAG); dbgs() << "\n"); + // Extend the source vector by one step. + SDValue NewSrc = + DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); + // Get the low and high halves of the new, extended one step, vector. + llvm::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); + // Extend those vector halves the rest of the way. + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + return; + } + } + // Fall back to the generic unary operator splitting otherwise. + SplitVecRes_UnaryOp(N, Lo, Hi); +} + void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi) { // The low and high parts of the original input give four input vectors. SDValue Inputs[4]; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); EVT NewVT = Inputs[0].getValueType(); @@ -994,7 +1045,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, // Extract the vector element by hand. SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Inputs[Input], DAG.getIntPtrConstant(Idx))); + Inputs[Input], DAG.getConstant(Idx, + TLI.getVectorIdxTy()))); } // Construct the Lo/Hi output using a BUILD_VECTOR. @@ -1030,6 +1082,10 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { dbgs() << "\n"); SDValue Res = SDValue(); + // See if the target wants to custom split this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + if (Res.getNode() == 0) { switch (N->getOpcode()) { default: @@ -1094,41 +1150,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { SDValue Mask = N->getOperand(0); SDValue Src0 = N->getOperand(1); SDValue Src1 = N->getOperand(2); - DebugLoc DL = N->getDebugLoc(); - EVT MaskVT = Mask.getValueType(); - assert(MaskVT.isVector() && "VSELECT without a vector mask?"); + EVT Src0VT = Src0.getValueType(); + SDLoc DL(N); + assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?"); SDValue Lo, Hi; GetSplitVector(N->getOperand(0), Lo, Hi); assert(Lo.getValueType() == Hi.getValueType() && - "Lo and Hi have differing types");; - - unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); - unsigned HiNumElts = Hi.getValueType().getVectorNumElements(); - assert(LoNumElts == HiNumElts && "Asymmetric vector split?"); - - LLVMContext &Ctx = *DAG.getContext(); - SDValue Zero = DAG.getIntPtrConstant(0); - SDValue LoElts = DAG.getIntPtrConstant(LoNumElts); - EVT Src0VT = Src0.getValueType(); - EVT Src0EltTy = Src0VT.getVectorElementType(); - EVT MaskEltTy = MaskVT.getVectorElementType(); - - EVT LoOpVT = EVT::getVectorVT(Ctx, Src0EltTy, LoNumElts); - EVT LoMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, LoNumElts); - EVT HiOpVT = EVT::getVectorVT(Ctx, Src0EltTy, HiNumElts); - EVT HiMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, HiNumElts); - - SDValue LoOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src0, Zero); - SDValue LoOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src1, Zero); + "Lo and Hi have differing types"); - SDValue HiOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src0, LoElts); - SDValue HiOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src1, LoElts); + EVT LoOpVT, HiOpVT; + llvm::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); + assert(LoOpVT == HiOpVT && "Asymmetric vector split?"); - SDValue LoMask = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoMaskVT, Mask, Zero); - SDValue HiMask = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiMaskVT, Mask, LoElts); + SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask; + llvm::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); + llvm::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); + llvm::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); SDValue LoSelect = DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); @@ -1142,7 +1180,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { // The result has a legal vector type, but the input needs splitting. EVT ResVT = N->getValueType(0); SDValue Lo, Hi; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitVector(N->getOperand(0), Lo, Hi); EVT InVT = Lo.getValueType(); @@ -1167,7 +1205,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { if (TLI.isBigEndian()) std::swap(Lo, Hi); - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), JoinIntegers(Lo, Hi)); } @@ -1175,7 +1213,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { // We know that the extracted result type is legal. EVT SubVT = N->getValueType(0); SDValue Idx = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Lo, Hi; GetSplitVector(N->getOperand(0), Lo, Hi); @@ -1215,7 +1253,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Store the vector to the stack. EVT EltVT = VecVT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo(), false, false, 0); @@ -1229,7 +1267,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { assert(N->isUnindexed() && "Indexed store of vector?"); assert(OpNo == 1 && "Can only split the stored value"); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); bool isTruncating = N->isTruncatingStore(); SDValue Ch = N->getChain(); @@ -1238,39 +1276,40 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned Alignment = N->getOriginalAlignment(); bool isVol = N->isVolatile(); bool isNT = N->isNonTemporal(); + const MDNode *TBAAInfo = N->getTBAAInfo(); SDValue Lo, Hi; GetSplitVector(N->getOperand(1), Lo, Hi); EVT LoMemVT, HiMemVT; - GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; if (isTruncating) Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - LoMemVT, isVol, isNT, Alignment); + LoMemVT, isVol, isNT, Alignment, TBAAInfo); else Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - isVol, isNT, Alignment); + isVol, isNT, Alignment, TBAAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); if (isTruncating) Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVol, isNT, Alignment); + HiMemVT, isVol, isNT, Alignment, TBAAInfo); else Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - isVol, isNT, Alignment); + isVol, isNT, Alignment, TBAAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // The input operands all must have the same type, and we know the result // type is valid. Convert this to a buildvector which extracts all the @@ -1284,7 +1323,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); i != e; ++i) { Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, - Op, DAG.getIntPtrConstant(i))); + Op, DAG.getConstant(i, TLI.getVectorIdxTy()))); } } @@ -1327,15 +1366,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { // to split more than once. if (InElementSize <= OutElementSize * 2) return SplitVecOp_UnaryOp(N); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Extract the halves of the input via extract_subvector. - EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), NumElements/2); - SDValue InLoVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, - DAG.getIntPtrConstant(0)); - SDValue InHiVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, - DAG.getIntPtrConstant(NumElements/2)); + SDValue InLoVec, InHiVec; + llvm::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); // Truncate them to 1/2 the element size. EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, @@ -1359,7 +1394,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { "Operand types must be vectors"); // The result has a legal vector type, but the input needs splitting. SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes; - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); GetSplitVector(N->getOperand(0), Lo0, Hi0); GetSplitVector(N->getOperand(1), Lo1, Hi1); unsigned PartElements = Lo0.getValueType().getVectorNumElements(); @@ -1377,7 +1412,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { // The result has a legal vector type, but the input needs splitting. EVT ResVT = N->getValueType(0); SDValue Lo, Hi; - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); GetSplitVector(N->getOperand(0), Lo, Hi); EVT InVT = Lo.getValueType(); @@ -1434,27 +1469,31 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VECTOR_SHUFFLE: Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); break; + case ISD::ADD: case ISD::AND: case ISD::BSWAP: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::OR: + case ISD::SUB: + case ISD::XOR: + Res = WidenVecRes_Binary(N); + break; + case ISD::FADD: case ISD::FCOPYSIGN: - case ISD::FDIV: case ISD::FMUL: case ISD::FPOW: - case ISD::FREM: case ISD::FSUB: - case ISD::MUL: - case ISD::MULHS: - case ISD::MULHU: - case ISD::OR: + case ISD::FDIV: + case ISD::FREM: case ISD::SDIV: - case ISD::SREM: case ISD::UDIV: + case ISD::SREM: case ISD::UREM: - case ISD::SUB: - case ISD::XOR: - Res = WidenVecRes_Binary(N); + Res = WidenVecRes_BinaryCanTrap(N); break; case ISD::FPOWI: @@ -1495,6 +1534,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -1512,7 +1552,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { // Ternary op widening. - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); @@ -1522,8 +1562,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { // Binary op widening. + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { + // Binary op widening for operations that can trap. unsigned Opcode = N->getOpcode(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; @@ -1562,9 +1611,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { while (CurNumElts != 0) { while (CurNumElts >= NumElts) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); Idx += NumElts; CurNumElts -= NumElts; @@ -1577,9 +1626,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { if (NumElts == 1) { for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp1, DAG.getIntPtrConstant(Idx)); + InOp1, DAG.getConstant(Idx, + TLI.getVectorIdxTy())); SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp2, DAG.getIntPtrConstant(Idx)); + InOp2, DAG.getConstant(Idx, + TLI.getVectorIdxTy())); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2); } @@ -1617,7 +1668,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { unsigned NumToInsert = ConcatEnd - Idx - 1; for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, - ConcatOps[OpIdx], DAG.getIntPtrConstant(i)); + ConcatOps[OpIdx], DAG.getConstant(i, + TLI.getVectorIdxTy())); } ConcatOps[Idx+1] = VecOp; ConcatEnd = Idx + 2; @@ -1659,7 +1711,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDValue InOp = N->getOperand(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); @@ -1705,7 +1757,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { if (InVTNumElts % WidenNumElts == 0) { SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, - InOp, DAG.getIntPtrConstant(0)); + InOp, DAG.getConstant(0, + TLI.getVectorIdxTy())); // Extract the input and convert the shorten input vector. if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVal); @@ -1720,7 +1773,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { unsigned i; for (i=0; i < MinElts; ++i) { SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, - DAG.getIntPtrConstant(i)); + DAG.getConstant(i, TLI.getVectorIdxTy())); if (N->getNumOperands() == 1) Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); else @@ -1738,7 +1791,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); SDValue ShOp = N->getOperand(1); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); } SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { @@ -1757,14 +1810,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { if (ShVT != ShWidenVT) ShOp = ModifyToType(ShOp, ShWidenVT); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); } SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { // Unary op widening. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp); } SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { @@ -1774,7 +1827,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { .getVectorElementType(), WidenVT.getVectorNumElements()); SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, WidenLHS, DAG.getValueType(ExtVT)); } @@ -1788,7 +1841,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { EVT InVT = InOp.getValueType(); EVT VT = N->getValueType(0); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); switch (getTypeAction(InVT)) { case TargetLowering::TypeLegal: @@ -1868,19 +1921,21 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Build a vector with undefined for the new nodes. EVT VT = N->getValueType(0); - EVT EltVT = VT.getVectorElementType(); + + // Integer BUILD_VECTOR operands may be larger than the node's vector element + // type. The UNDEFs need to have the same type as the existing operands. + EVT EltVT = N->getOperand(0).getValueType(); unsigned NumElts = VT.getVectorNumElements(); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); unsigned WidenNumElts = WidenVT.getVectorNumElements(); SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end()); - NewOps.reserve(WidenNumElts); - for (unsigned i = NumElts; i < WidenNumElts; ++i) - NewOps.push_back(DAG.getUNDEF(EltVT)); + assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!"); + NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT)); return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size()); } @@ -1888,7 +1943,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { EVT InVT = N->getOperand(0).getValueType(); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned WidenNumElts = WidenVT.getVectorNumElements(); unsigned NumInElts = InVT.getVectorNumElements(); unsigned NumOperands = N->getNumOperands(); @@ -1946,7 +2001,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(j)); + DAG.getConstant(j, TLI.getVectorIdxTy())); } SDValue UndefVal = DAG.getUNDEF(EltVT); for (; Idx < WidenNumElts; ++Idx) @@ -1955,7 +2010,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue InOp = N->getOperand(0); SDValue RndOp = N->getOperand(3); SDValue SatOp = N->getOperand(4); @@ -2004,7 +2059,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { if (InVTNumElts % WidenNumElts == 0) { // Extract the input and convert the shorten input vector. InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, SatOp, CvtCode); } @@ -2020,7 +2075,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { unsigned i; for (i=0; i < MinElts; ++i) { SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getIntPtrConstant(i)); + DAG.getConstant(i, TLI.getVectorIdxTy())); Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, SatOp, CvtCode); } @@ -2038,7 +2093,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { unsigned WidenNumElts = WidenVT.getVectorNumElements(); SDValue InOp = N->getOperand(0); SDValue Idx = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) InOp = GetWidenedVector(InOp); @@ -2063,7 +2118,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { unsigned i; for (i=0; i < NumElts; ++i) Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(IdxVal+i)); + DAG.getConstant(IdxVal+i, TLI.getVectorIdxTy())); SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) @@ -2073,7 +2128,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(), + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), InOp.getValueType(), InOp, N->getOperand(1), N->getOperand(2)); } @@ -2096,7 +2151,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { if (LdChain.size() == 1) NewChain = LdChain[0]; else - NewChain = DAG.getNode(ISD::TokenFactor, LD->getDebugLoc(), MVT::Other, + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, &LdChain[0], LdChain.size()); // Modified the chain - switch anything that used the old chain to use @@ -2108,7 +2163,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(), + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), WidenVT, N->getOperand(0)); } @@ -2132,14 +2187,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDValue InOp2 = GetWidenedVector(N->getOperand(2)); assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, Cond1, InOp1, InOp2); } SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(2)); SDValue InOp2 = GetWidenedVector(N->getOperand(3)); - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), InOp1.getValueType(), N->getOperand(0), N->getOperand(1), InOp1, InOp2, N->getOperand(4)); } @@ -2153,7 +2208,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(ISD::SETCC, N->getDebugLoc(), WidenVT, + return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, InOp1, InOp2, N->getOperand(2)); } @@ -2164,7 +2219,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); unsigned NumElts = VT.getVectorNumElements(); @@ -2208,7 +2263,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { InOp2.getValueType() == WidenInVT && "Input not widened to expected type!"); (void)WidenInVT; - return DAG.getNode(ISD::SETCC, N->getDebugLoc(), + return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, InOp1, InOp2, N->getOperand(2)); } @@ -2277,7 +2332,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // into some scalar code and create a nasty build vector. EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned NumElts = VT.getVectorNumElements(); SDValue InOp = N->getOperand(0); if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) @@ -2290,7 +2345,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { for (unsigned i=0; i < NumElts; ++i) Ops[i] = DAG.getNode(Opcode, dl, EltVT, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getIntPtrConstant(i))); + DAG.getConstant(i, TLI.getVectorIdxTy()))); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); } @@ -2299,7 +2354,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { EVT VT = N->getValueType(0); SDValue InOp = GetWidenedVector(N->getOperand(0)); EVT InWidenVT = InOp.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Check if we can convert between two legal vector types and extract. unsigned InWidenSize = InWidenVT.getSizeInBits(); @@ -2311,7 +2366,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); } } @@ -2324,7 +2379,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { // nasty build vector. EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned NumElts = VT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(NumElts); @@ -2339,20 +2394,20 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(j)); + DAG.getConstant(j, TLI.getVectorIdxTy())); } return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); } SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0), InOp, N->getOperand(1)); } SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), N->getValueType(0), InOp, N->getOperand(1)); } @@ -2370,14 +2425,14 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { if (StChain.size() == 1) return StChain[0]; else - return DAG.getNode(ISD::TokenFactor, ST->getDebugLoc(), + return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other,&StChain[0],StChain.size()); } SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // WARNING: In this code we widen the compare instruction with garbage. // This garbage may contain denormal floats which may be slow. Is this a real @@ -2385,8 +2440,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { // Get a new SETCC node to compare the newly widened operands. // Only some of the compared elements are legal. - EVT SVT = TLI.getSetCCResultType(InOp0.getValueType()); - SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(), + EVT SVT = TLI.getSetCCResultType(*DAG.getContext(), InOp0.getValueType()); + SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), SVT, InOp0, InOp1, N->getOperand(2)); // Extract the needed results from the result vector. @@ -2394,7 +2449,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SVT.getVectorElementType(), N->getValueType(0).getVectorNumElements()); SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, - ResVT, WideSETCC, DAG.getIntPtrConstant(0)); + ResVT, WideSETCC, DAG.getConstant(0, + TLI.getVectorIdxTy())); return PromoteTargetBoolean(CC, N->getValueType(0)); } @@ -2465,9 +2521,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, // LDOps: Load operators to build a vector type // [Start,End) the list of loads to use. static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, - SmallVector<SDValue, 16>& LdOps, + SmallVectorImpl<SDValue> &LdOps, unsigned Start, unsigned End) { - DebugLoc dl = LdOps[Start].getDebugLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc dl(LdOps[Start]); EVT LdTy = LdOps[Start].getValueType(); unsigned Width = VecTy.getSizeInBits(); unsigned NumElts = Width / LdTy.getSizeInBits(); @@ -2487,12 +2544,12 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, LdTy = NewLdTy; } VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], - DAG.getIntPtrConstant(Idx++)); + DAG.getConstant(Idx++, TLI.getVectorIdxTy())); } return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } -SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, +SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, LoadSDNode *LD) { // The strategy assumes that we can efficiently load powers of two widths. // The routines chops the vector into the largest vector loads with the same @@ -2501,7 +2558,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); unsigned WidenWidth = WidenVT.getSizeInBits(); EVT LdVT = LD->getMemoryVT(); - DebugLoc dl = LD->getDebugLoc(); + SDLoc dl(LD); assert(LdVT.isVector() && WidenVT.isVector()); assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); @@ -2512,6 +2569,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; // Difference @@ -2521,7 +2579,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Align); + isVolatile, isNonTemporal, isInvariant, Align, + TBAAInfo); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction @@ -2557,7 +2616,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, unsigned Increment = NewVTWidth / 8; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getIntPtrConstant(Increment)); + DAG.getConstant(Increment, BasePtr.getValueType())); SDValue L; if (LdWidth < NewVTWidth) { @@ -2566,7 +2625,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, NewVTWidth = NewVT.getSizeInBits(); L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, isInvariant, MinAlign(Align, Increment)); + isNonTemporal, isInvariant, MinAlign(Align, Increment), + TBAAInfo); LdChain.push_back(L.getValue(1)); if (L->getValueType(0).isVector()) { SmallVector<SDValue, 16> Loads; @@ -2582,7 +2642,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, } else { L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, isInvariant, MinAlign(Align, Increment)); + isNonTemporal, isInvariant, MinAlign(Align, Increment), + TBAAInfo); LdChain.push_back(L.getValue(1)); } @@ -2646,14 +2707,14 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, } SDValue -DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, - LoadSDNode * LD, +DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, + LoadSDNode *LD, ISD::LoadExtType ExtType) { // For extension loads, it may not be more efficient to chop up the vector // and then extended it. Instead, we unroll the load and build a new vector. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); EVT LdVT = LD->getMemoryVT(); - DebugLoc dl = LD->getDebugLoc(); + SDLoc dl(LD); assert(LdVT.isVector() && WidenVT.isVector()); // Load information @@ -2662,6 +2723,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); EVT EltVT = WidenVT.getVectorElementType(); EVT LdEltVT = LdVT.getVectorElementType(); @@ -2673,15 +2735,17 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, unsigned Increment = LdEltVT.getSizeInBits() / 8; Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(), - LdEltVT, isVolatile, isNonTemporal, Align); + LdEltVT, isVolatile, isNonTemporal, Align, TBAAInfo); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, DAG.getIntPtrConstant(Offset)); + BasePtr, + DAG.getConstant(Offset, + BasePtr.getValueType())); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, - isVolatile, isNonTemporal, Align); + isVolatile, isNonTemporal, Align, TBAAInfo); LdChain.push_back(Ops[i].getValue(1)); } @@ -2694,7 +2758,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, } -void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, +void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST) { // The strategy assumes that we can efficiently store powers of two widths. // The routines chops the vector into the largest vector stores with the same @@ -2704,8 +2768,9 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); - DebugLoc dl = ST->getDebugLoc(); + SDLoc dl(ST); EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); @@ -2726,16 +2791,16 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, unsigned NumVTElts = NewVT.getVectorNumElements(); do { SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, - MinAlign(Align, Offset))); + MinAlign(Align, Offset), TBAAInfo)); StWidth -= NewVTWidth; Offset += Increment; Idx += NumVTElts; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getIntPtrConstant(Increment)); + DAG.getConstant(Increment, BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); } else { // Cast the vector to the scalar type we can store @@ -2746,15 +2811,15 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, Idx = Idx * ValEltWidth / NewVTWidth; do { SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, - DAG.getIntPtrConstant(Idx++)); + DAG.getConstant(Idx++, TLI.getVectorIdxTy())); StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, - MinAlign(Align, Offset))); + MinAlign(Align, Offset), TBAAInfo)); StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getIntPtrConstant(Increment)); + DAG.getConstant(Increment, BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); // Restore index back to be relative to the original widen element type Idx = Idx * NewVTWidth / ValEltWidth; @@ -2763,7 +2828,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, } void -DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, +DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST) { // For extension loads, it may not be more efficient to truncate the vector // and then store it. Instead, we extract each element and then store it. @@ -2772,8 +2837,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); - DebugLoc dl = ST->getDebugLoc(); + SDLoc dl(ST); EVT StVT = ST->getMemoryVT(); EVT ValVT = ValOp.getValueType(); @@ -2791,20 +2857,22 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, unsigned Increment = ValEltVT.getSizeInBits() / 8; unsigned NumElts = StVT.getVectorNumElements(); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT, - isVolatile, isNonTemporal, Align)); + isVolatile, isNonTemporal, Align, + TBAAInfo)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, DAG.getIntPtrConstant(Offset)); + BasePtr, DAG.getConstant(Offset, + BasePtr.getValueType())); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), StEltVT, isVolatile, isNonTemporal, - MinAlign(Align, Offset))); + MinAlign(Align, Offset), TBAAInfo)); } } @@ -2816,7 +2884,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { EVT InVT = InOp.getValueType(); assert(InVT.getVectorElementType() == NVT.getVectorElementType() && "input and widen element type must match"); - DebugLoc dl = InOp.getDebugLoc(); + SDLoc dl(InOp); // Check if InOp already has the right width. if (InVT == NVT) @@ -2837,7 +2905,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); // Fall back to extract and build. SmallVector<SDValue, 16> Ops(WidenNumElts); @@ -2846,7 +2914,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { unsigned Idx; for (Idx = 0; Idx < MinNumElts; ++Idx) Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); SDValue UndefVal = DAG.getUNDEF(EltVT); for ( ; Idx < WidenNumElts; ++Idx) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 473e138..1dd2128 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -42,11 +42,11 @@ static cl::opt<signed> RegPressureThreshold( ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : Picker(this), - InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData()) + InstrItins(IS->getTargetLowering()->getTargetMachine().getInstrItineraryData()) { - TII = IS->getTargetLowering().getTargetMachine().getInstrInfo(); - TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo(); - TLI = &IS->getTargetLowering(); + TII = IS->getTargetLowering()->getTargetMachine().getInstrInfo(); + TRI = IS->getTargetLowering()->getTargetMachine().getRegisterInfo(); + TLI = IS->getTargetLowering(); const TargetMachine &tm = (*IS->MF).getTarget(); ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL); @@ -389,10 +389,9 @@ signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { // Constants used to denote relative importance of // heuristic components for cost computation. static const unsigned PriorityOne = 200; -static const unsigned PriorityTwo = 100; -static const unsigned PriorityThree = 50; -static const unsigned PriorityFour = 15; -static const unsigned PriorityFive = 5; +static const unsigned PriorityTwo = 50; +static const unsigned PriorityThree = 15; +static const unsigned PriorityFour = 5; static const unsigned ScaleOne = 20; static const unsigned ScaleTwo = 10; static const unsigned ScaleThree = 5; @@ -449,7 +448,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { if (N->isMachineOpcode()) { const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); if (TID.isCall()) - ResCount += (PriorityThree + (ScaleThree*N->getNumValues())); + ResCount += (PriorityTwo + (ScaleThree*N->getNumValues())); } else switch (N->getOpcode()) { @@ -457,11 +456,11 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { case ISD::TokenFactor: case ISD::CopyFromReg: case ISD::CopyToReg: - ResCount += PriorityFive; + ResCount += PriorityFour; break; case ISD::INLINEASM: - ResCount += PriorityFour; + ResCount += PriorityThree; break; } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h deleted file mode 100644 index 7e7b897..0000000 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h +++ /dev/null @@ -1,56 +0,0 @@ -//===-- llvm/CodeGen/SDNodeOrdering.h - SDNode Ordering ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the SDNodeOrdering class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_SDNODEORDERING_H -#define LLVM_CODEGEN_SDNODEORDERING_H - -#include "llvm/ADT/DenseMap.h" - -namespace llvm { - -class SDNode; - -/// SDNodeOrdering - Maps a unique (monotonically increasing) value to each -/// SDNode that roughly corresponds to the ordering of the original LLVM -/// instruction. This is used for turning off scheduling, because we'll forgo -/// the normal scheduling algorithms and output the instructions according to -/// this ordering. -class SDNodeOrdering { - DenseMap<const SDNode*, unsigned> OrderMap; - - void operator=(const SDNodeOrdering&) LLVM_DELETED_FUNCTION; - SDNodeOrdering(const SDNodeOrdering&) LLVM_DELETED_FUNCTION; -public: - SDNodeOrdering() {} - - void add(const SDNode *Node, unsigned NewOrder) { - unsigned &OldOrder = OrderMap[Node]; - if (OldOrder == 0 || (OldOrder > 0 && NewOrder < OldOrder)) - OldOrder = NewOrder; - } - void remove(const SDNode *Node) { - DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node); - if (Itr != OrderMap.end()) - OrderMap.erase(Itr); - } - void clear() { - OrderMap.clear(); - } - unsigned getOrder(const SDNode *Node) { - return OrderMap[Node]; - } -}; - -} // end llvm namespace - -#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index d1f36cb..6c5e0ab 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -102,8 +102,8 @@ private: void InsertCopiesAndMoveSuccs(SUnit*, unsigned, const TargetRegisterClass*, const TargetRegisterClass*, - SmallVector<SUnit*, 2>&); - bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); + SmallVectorImpl<SUnit*>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); void ListScheduleBottomUp(); /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. @@ -387,7 +387,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, - SmallVector<SUnit*, 2> &Copies) { + SmallVectorImpl<SUnit*> &Copies) { SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL)); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; @@ -448,7 +448,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, std::vector<SUnit*> &LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, - SmallVector<unsigned, 4> &LRegs, + SmallVectorImpl<unsigned> &LRegs, const TargetRegisterInfo *TRI) { bool Added = false; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { @@ -467,7 +467,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, /// If the specific node is the last one that's available to schedule, do /// whatever is necessary (i.e. backtracking or cloning) to make it possible. bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, - SmallVector<unsigned, 4> &LRegs){ + SmallVectorImpl<unsigned> &LRegs){ if (NumLiveRegs == 0) return false; @@ -567,7 +567,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { // "expensive to copy" values to break the dependency. In case even // that doesn't work, insert cross class copies. SUnit *TrySU = NotReady[0]; - SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index c009cfc..1a562d7 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -229,8 +229,8 @@ private: void InsertCopiesAndMoveSuccs(SUnit*, unsigned, const TargetRegisterClass*, const TargetRegisterClass*, - SmallVector<SUnit*, 2>&); - bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); + SmallVectorImpl<SUnit*>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); void releaseInterferences(unsigned Reg = 0); @@ -718,7 +718,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { // indicate the scheduled cycle. SU->setHeightToAtLeast(CurCycle); - // Reserve resources for the scheduled intruction. + // Reserve resources for the scheduled instruction. EmitNode(SU); Sequence.push_back(SU); @@ -1133,9 +1133,9 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { /// InsertCopiesAndMoveSuccs - Insert register copies and move all /// scheduled successors of the given SUnit to the last copy. void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - SmallVector<SUnit*, 2> &Copies) { + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVectorImpl<SUnit*> &Copies) { SUnit *CopyFromSU = CreateNewSUnit(NULL); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; @@ -1205,7 +1205,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, std::vector<SUnit*> &LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, - SmallVector<unsigned, 4> &LRegs, + SmallVectorImpl<unsigned> &LRegs, const TargetRegisterInfo *TRI) { for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { @@ -1227,7 +1227,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, std::vector<SUnit*> &LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, - SmallVector<unsigned, 4> &LRegs) { + SmallVectorImpl<unsigned> &LRegs) { // Look at all live registers. Skip Reg0 and the special CallResource. for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { if (!LiveRegDefs[i]) continue; @@ -1252,7 +1252,7 @@ static const uint32_t *getNodeRegMask(const SDNode *N) { /// If the specific node is the last one that's available to schedule, do /// whatever is necessary (i.e. backtracking or cloning) to make it possible. bool ScheduleDAGRRList:: -DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { +DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { if (NumLiveRegs == 0) return false; @@ -1331,7 +1331,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { SUnit *SU = Interferences[i-1]; LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); if (Reg) { - SmallVector<unsigned, 4> &LRegs = LRegsPos->second; + SmallVectorImpl<unsigned> &LRegs = LRegsPos->second; if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end()) continue; } @@ -1385,7 +1385,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // to resolve it. for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { SUnit *TrySU = Interferences[i]; - SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; // Try unscheduling up to the point where it's safe to schedule // this node. @@ -1433,7 +1433,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // insert cross class copies. // If it's not too expensive, i.e. cost != -1, issue copies. SUnit *TrySU = Interferences[0]; - SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; @@ -1692,7 +1692,7 @@ public: unsigned getNodeOrdering(const SUnit *SU) const { if (!SU->getNode()) return 0; - return scheduleDAG->DAG->GetOrdering(SU->getNode()); + return SU->getNode()->getIROrder(); } bool empty() const { return Queue.empty(); } @@ -2401,7 +2401,8 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { bool RHasPhysReg = right->hasPhysRegDefs; if (LHasPhysReg != RHasPhysReg) { #ifndef NDEBUG - const char *const PhysRegMsg[] = {" has no physreg"," defines a physreg"}; + static const char *const PhysRegMsg[] = { " has no physreg", + " defines a physreg" }; #endif DEBUG(dbgs() << " SU (" << left->NodeNum << ") " << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " @@ -3013,7 +3014,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - const TargetLowering *TLI = &IS->getTargetLowering(); + const TargetLowering *TLI = IS->getTargetLowering(); HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); @@ -3029,7 +3030,7 @@ llvm::createILPListDAGScheduler(SelectionDAGISel *IS, const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - const TargetLowering *TLI = &IS->getTargetLowering(); + const TargetLowering *TLI = IS->getTargetLowering(); ILPBURRPriorityQueue *PQ = new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index b22440d..054e3dd 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -690,21 +690,11 @@ void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { } #endif // NDEBUG -namespace { - struct OrderSorter { - bool operator()(const std::pair<unsigned, MachineInstr*> &A, - const std::pair<unsigned, MachineInstr*> &B) { - return A.first < B.first; - } - }; -} - /// ProcessSDDbgValues - Process SDDbgValues associated with this node. -static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, - InstrEmitter &Emitter, - SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, - DenseMap<SDValue, unsigned> &VRBaseMap, - unsigned Order) { +static void +ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, + SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders, + DenseMap<SDValue, unsigned> &VRBaseMap, unsigned Order) { if (!N->getHasDebugValue()) return; @@ -731,12 +721,12 @@ static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, // ProcessSourceNode - Process nodes with source order numbers. These are added // to a vector which EmitSchedule uses to determine how to insert dbg_value // instructions in the right order. -static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, - InstrEmitter &Emitter, - DenseMap<SDValue, unsigned> &VRBaseMap, - SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, - SmallSet<unsigned, 8> &Seen) { - unsigned Order = DAG->GetOrdering(N); +static void +ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, + DenseMap<SDValue, unsigned> &VRBaseMap, + SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders, + SmallSet<unsigned, 8> &Seen) { + unsigned Order = N->getIROrder(); if (!Order || !Seen.insert(Order)) { // Process any valid SDDbgValues even if node does not have any order // assigned. @@ -745,7 +735,10 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, } MachineBasicBlock *BB = Emitter.getBlock(); - if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { + if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() || + // Fast-isel may have inserted some instructions, in which case the + // BB->back().isPHI() test will not fire when we want it to. + prior(Emitter.getInsertPos())->isPHI()) { // Did not insert any instruction. Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); return; @@ -858,7 +851,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Sort the source order instructions and use the order to insert debug // values. - std::sort(Orders.begin(), Orders.end(), OrderSorter()); + std::sort(Orders.begin(), Orders.end(), less_first()); SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); @@ -883,7 +876,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Insert at the instruction, which may be in a different // block, if the block was split by a custom inserter. MachineBasicBlock::iterator Pos = MI; - MI->getParent()->insert(llvm::next(Pos), DbgMI); + MI->getParent()->insert(Pos, DbgMI); } } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 15235c8..45d5a4f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -13,7 +13,6 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "SDNodeDbgValue.h" -#include "SDNodeOrdering.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -636,9 +635,6 @@ void SelectionDAG::DeallocateNode(SDNode *N) { NodeAllocator.Deallocate(AllNodes.remove(N)); - // Remove the ordering of this node. - Ordering->remove(N); - // If any of the SDDbgValue nodes refer to this SDNode, invalidate them. ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N); for (unsigned i = 0, e = DbgVals.size(); i != e; ++i) @@ -868,30 +864,30 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { PointerType::get(Type::getInt8Ty(*getContext()), 0) : VT.getTypeForEVT(*getContext()); - return TLI.getDataLayout()->getABITypeAlignment(Ty); + return TM.getTargetLowering()->getDataLayout()->getABITypeAlignment(Ty); } // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), - TTI(0), OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), - getVTList(MVT::Other)), - Root(getEntryNode()), Ordering(0), UpdateListeners(0) { + : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), TLI(0), OptLevel(OL), + EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), + Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), + UpdateListeners(0) { AllNodes.push_back(&EntryNode); - Ordering = new SDNodeOrdering(); DbgInfo = new SDDbgInfo(); } -void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti) { +void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti, + const TargetLowering *tli) { MF = &mf; TTI = tti; + TLI = tli; Context = &mf.getFunction()->getContext(); } SelectionDAG::~SelectionDAG() { assert(!UpdateListeners && "Dangling registered DAGUpdateListeners"); allnodes_clear(); - delete Ordering; delete DbgInfo; } @@ -918,29 +914,28 @@ void SelectionDAG::clear() { EntryNode.UseList = 0; AllNodes.push_back(&EntryNode); Root = getEntryNode(); - Ordering->clear(); DbgInfo->clear(); } -SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { +SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::ANY_EXTEND, DL, VT, Op) : getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { +SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::SIGN_EXTEND, DL, VT, Op) : getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { +SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::ZERO_EXTEND, DL, VT, Op) : getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) { +SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { assert(!VT.isVector() && "getZeroExtendInReg should use the vector element type instead of " "the vector type!"); @@ -954,7 +949,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) { /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). /// -SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) { +SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); SDValue NegOne = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); @@ -979,16 +974,66 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { EVT EltVT = VT.getScalarType(); const ConstantInt *Elt = &Val; + const TargetLowering *TLI = TM.getTargetLowering(); + // In some cases the vector type is legal but the element type is illegal and // needs to be promoted, for example v8i8 on ARM. In this case, promote the // inserted value (the type does not need to match the vector element type). // Any extra bits introduced will be truncated away. - if (VT.isVector() && TLI.getTypeAction(*getContext(), EltVT) == + if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == TargetLowering::TypePromoteInteger) { - EltVT = TLI.getTypeToTransformTo(*getContext(), EltVT); + EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits()); Elt = ConstantInt::get(*getContext(), NewVal); } + // In other cases the element type is illegal and needs to be expanded, for + // example v2i64 on MIPS32. In this case, find the nearest legal type, split + // the value into n parts and use a vector type with n-times the elements. + // Then bitcast to the type requested. + // Legalizing constants too early makes the DAGCombiner's job harder so we + // only legalize if the DAG tells us we must produce legal types. + else if (NewNodesMustHaveLegalTypes && VT.isVector() && + TLI->getTypeAction(*getContext(), EltVT) == + TargetLowering::TypeExpandInteger) { + APInt NewVal = Elt->getValue(); + EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); + unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); + unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits; + EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts); + + // Check the temporary vector is the correct size. If this fails then + // getTypeToTransformTo() probably returned a type whose size (in bits) + // isn't a power-of-2 factor of the requested type size. + assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits()); + + SmallVector<SDValue, 2> EltParts; + for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) { + EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) + .trunc(ViaEltSizeInBits), + ViaEltVT, isT)); + } + + // EltParts is currently in little endian order. If we actually want + // big-endian order then reverse it now. + if (TLI->isBigEndian()) + std::reverse(EltParts.begin(), EltParts.end()); + + // The elements must be reversed when the element order is different + // to the endianness of the elements (because the BITCAST is itself a + // vector shuffle in this situation). However, we do not need any code to + // perform this reversal because getConstant() is producing a vector + // splat. + // This situation occurs in MIPS MSA. + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) + Ops.insert(Ops.end(), EltParts.begin(), EltParts.end()); + + SDValue Result = getNode(ISD::BITCAST, SDLoc(), VT, + getNode(ISD::BUILD_VECTOR, SDLoc(), ViaVecVT, + &Ops[0], Ops.size())); + return Result; + } assert(Elt->getBitWidth() == EltVT.getSizeInBits() && "APInt size does not match type size!"); @@ -1012,13 +1057,13 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { if (VT.isVector()) { SmallVector<SDValue, 8> Ops; Ops.assign(VT.getVectorNumElements(), Result); - Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size()); + Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, &Ops[0], Ops.size()); } return Result; } SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) { - return getConstant(Val, TLI.getPointerTy(), isTarget); + return getConstant(Val, TM.getTargetLowering()->getPointerTy(), isTarget); } @@ -1054,8 +1099,8 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ if (VT.isVector()) { SmallVector<SDValue, 8> Ops; Ops.assign(VT.getVectorNumElements(), Result); - // FIXME DebugLoc info might be appropriate here - Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size()); + // FIXME SDLoc info might be appropriate here + Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, &Ops[0], Ops.size()); } return Result; } @@ -1077,15 +1122,16 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { llvm_unreachable("Unsupported type in getConstantFP"); } -SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, +SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, EVT VT, int64_t Offset, bool isTargetGA, unsigned char TargetFlags) { assert((TargetFlags == 0 || isTargetGA) && "Cannot set target flags on target-independent globals"); + const TargetLowering *TLI = TM.getTargetLowering(); // Truncate (with sign-extension) the offset value to the pointer size. - unsigned BitWidth = TLI.getPointerTy().getSizeInBits(); + unsigned BitWidth = TLI->getPointerTypeSizeInBits(GV->getType()); if (BitWidth < 64) Offset = SignExtend64(Offset, BitWidth); @@ -1112,7 +1158,8 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL, GV, VT, + SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL.getIROrder(), + DL.getDebugLoc(), GV, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -1161,7 +1208,8 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType()); + Alignment = + TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); @@ -1188,7 +1236,8 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType()); + Alignment = + TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); @@ -1299,13 +1348,10 @@ static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) { } } -SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, +SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *Mask) { - assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE"); - assert(VT.isVector() && N1.getValueType().isVector() && - "Vector Shuffle VTs must be a vectors"); - assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() - && "Vector Shuffle VTs must have same element type"); + assert(VT == N1.getValueType() && VT == N2.getValueType() && + "Invalid VECTOR_SHUFFLE"); // Canonicalize shuffle undef, undef -> undef if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF) @@ -1354,17 +1400,13 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, commuteShuffle(N1, N2, MaskVec); } - // If Identity shuffle, or all shuffle in to undef, return that node. - bool AllUndef = true; + // If Identity shuffle return that node. bool Identity = true; for (unsigned i = 0; i != NElts; ++i) { if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; - if (MaskVec[i] >= 0) AllUndef = false; } - if (Identity && NElts == N1.getValueType().getVectorNumElements()) + if (Identity && NElts) return N1; - if (AllUndef) - return getUNDEF(VT); FoldingSetNodeID ID; SDValue Ops[2] = { N1, N2 }; @@ -1383,13 +1425,15 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); ShuffleVectorSDNode *N = - new (NodeAllocator) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); + new (NodeAllocator) ShuffleVectorSDNode(VT, dl.getIROrder(), + dl.getDebugLoc(), N1, N2, + MaskAlloc); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } -SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl, +SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, SDValue Val, SDValue DTy, SDValue STy, SDValue Rnd, SDValue Sat, ISD::CvtCode Code) { @@ -1406,8 +1450,9 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl, Ops, 5, - Code); + CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), + dl.getDebugLoc(), + Ops, 5, Code); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1441,7 +1486,7 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { return SDValue(N, 0); } -SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { +SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { FoldingSetNodeID ID; SDValue Ops[] = { Root }; AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), &Ops[0], 1); @@ -1450,7 +1495,8 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label); + SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), + dl.getDebugLoc(), Root, Label); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1513,16 +1559,36 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { return SDValue(N, 0); } +/// getAddrSpaceCast - Return an AddrSpaceCastSDNode. +SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, + unsigned SrcAS, unsigned DestAS) { + SDValue Ops[] = {Ptr}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), &Ops[0], 1); + ID.AddInteger(SrcAS); + ID.AddInteger(DestAS); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) AddrSpaceCastSDNode(dl.getIROrder(), + dl.getDebugLoc(), + VT, Ptr, SrcAS, DestAS); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { EVT OpTy = Op.getValueType(); - EVT ShTy = TLI.getShiftAmountTy(LHSTy); + EVT ShTy = TM.getTargetLowering()->getShiftAmountTy(LHSTy); if (OpTy == ShTy || OpTy.isVector()) return Op; ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; - return getNode(Opcode, Op.getDebugLoc(), ShTy, Op); + return getNode(Opcode, SDLoc(Op), ShTy, Op); } /// CreateStackTemporary - Create a stack temporary, suitable for holding the @@ -1531,11 +1597,12 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); unsigned ByteSize = VT.getStoreSize(); Type *Ty = VT.getTypeForEVT(*getContext()); + const TargetLowering *TLI = TM.getTargetLowering(); unsigned StackAlign = - std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), minAlign); + std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), minAlign); int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); - return getFrameIndex(FrameIdx, TLI.getPointerTy()); + return getFrameIndex(FrameIdx, TLI->getPointerTy()); } /// CreateStackTemporary - Create a stack temporary suitable for holding @@ -1545,24 +1612,30 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { VT2.getStoreSizeInBits())/8; Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); - const DataLayout *TD = TLI.getDataLayout(); + const TargetLowering *TLI = TM.getTargetLowering(); + const DataLayout *TD = TLI->getDataLayout(); unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), TD->getPrefTypeAlignment(Ty2)); MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false); - return getFrameIndex(FrameIdx, TLI.getPointerTy()); + return getFrameIndex(FrameIdx, TLI->getPointerTy()); } SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, - SDValue N2, ISD::CondCode Cond, DebugLoc dl) { + SDValue N2, ISD::CondCode Cond, SDLoc dl) { // These setcc operations always fold. switch (Cond) { default: break; case ISD::SETFALSE: case ISD::SETFALSE2: return getConstant(0, VT); case ISD::SETTRUE: - case ISD::SETTRUE2: return getConstant(1, VT); + case ISD::SETTRUE2: { + const TargetLowering *TLI = TM.getTargetLowering(); + TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(VT.isVector()); + return getConstant( + Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); + } case ISD::SETOEQ: case ISD::SETOGT: @@ -1644,7 +1717,12 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, } } else { // Ensure that the constant occurs on the RHS. - return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond)); + ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); + MVT CompVT = N1.getValueType().getSimpleVT(); + if (!TM.getTargetLowering()->isCondCodeLegal(SwappedCond, CompVT)) + return SDValue(); + + return getSetCC(dl, VT, N2, N1, SwappedCond); } } @@ -1680,6 +1758,7 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, /// processing. void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth) const { + const TargetLowering *TLI = TM.getTargetLowering(); unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. @@ -1802,7 +1881,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // The boolean result conforms to getBooleanContents. Fall through. case ISD::SETCC: // If we know the result of a setcc has the top bits zero, use this info. - if (TLI.getBooleanContents(Op.getValueType().isVector()) == + if (TLI->getBooleanContents(Op.getValueType().isVector()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); return; @@ -1942,7 +2021,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::SIGN_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InSignBit = APInt::getSignBit(InBits); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); @@ -2054,7 +2132,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, const APInt &RA = Rem->getAPIntValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); // The low bits of the first operand are unchanged by the srem. @@ -2114,7 +2191,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Allow the target to implement this method for its nodes. - TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); + TLI->computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); return; } } @@ -2125,6 +2202,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, /// information. For example, immediately after an "SRA X, 2", we know that /// the top 3 bits are all equal to each other, so we return 3. unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ + const TargetLowering *TLI = TM.getTargetLowering(); EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); unsigned VTBits = VT.getScalarType().getSizeInBits(); @@ -2149,7 +2227,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ } case ISD::SIGN_EXTEND: - Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + Tmp = + VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; case ISD::SIGN_EXTEND_INREG: @@ -2209,7 +2288,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ // The boolean result conforms to getBooleanContents. Fall through. case ISD::SETCC: // If setcc returns 0/-1, all bits are sign bits. - if (TLI.getBooleanContents(Op.getValueType().isVector()) == + if (TLI->getBooleanContents(Op.getValueType().isVector()) == TargetLowering::ZeroOrNegativeOneBooleanContent) return VTBits; break; @@ -2310,7 +2389,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) { - unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth); + unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, Depth); if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits); } @@ -2403,14 +2482,15 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { /// getNode - Gets or creates the specified node. /// -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) SDNode(Opcode, DL, getVTList(VT)); + SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), getVTList(VT)); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -2420,7 +2500,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue Operand) { // Constant fold unary operations with an integer constant operand. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) { @@ -2671,10 +2751,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, Operand); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, Operand); } AllNodes.push_back(N); @@ -2789,11 +2871,11 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, return Outputs.back(); // Otherwise build a big vector out of the scalar elements we generated. - return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(), + return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs.data(), Outputs.size()); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); @@ -3072,9 +3154,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, if (VT.isSimple() && N1.getValueType().isSimple()) { assert(VT.isVector() && N1.getValueType().isVector() && "Extract subvector VTs must be a vectors!"); - assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() && + assert(VT.getVectorElementType() == + N1.getValueType().getVectorElementType() && "Extract subvector VTs must have the same element type!"); - assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() && + assert(VT.getSimpleVT() <= N1.getSimpleValueType() && "Extract subvector must be from larger vector to smaller vector!"); if (isa<ConstantSDNode>(Index.getNode())) { @@ -3085,7 +3168,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, } // Trivial extraction. - if (VT.getSimpleVT() == N1.getValueType().getSimpleVT()) + if (VT.getSimpleVT() == N1.getSimpleValueType()) return N1; } break; @@ -3243,10 +3326,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2); } AllNodes.push_back(N); @@ -3256,11 +3341,26 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3) { // Perform various simplifications. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); switch (Opcode) { + case ISD::FMA: { + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); + ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3); + if (N1CFP && N2CFP && N3CFP) { + APFloat V1 = N1CFP->getValueAPF(); + const APFloat &V2 = N2CFP->getValueAPF(); + const APFloat &V3 = N3CFP->getValueAPF(); + APFloat::opStatus s = + V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); + if (s != APFloat::opInvalidOp) + return getConstantFP(V1, VT); + } + break; + } case ISD::CONCAT_VECTORS: // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to // one big BUILD_VECTOR. @@ -3300,7 +3400,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, "Insert subvector VTs must be a vectors"); assert(VT == N1.getValueType() && "Dest and insert subvector source types must match!"); - assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() && + assert(N2.getSimpleValueType() <= N1.getSimpleValueType() && "Insert subvector must be from smaller vector to larger vector!"); if (isa<ConstantSDNode>(Index.getNode())) { assert((N2.getValueType().getVectorNumElements() + @@ -3310,7 +3410,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, } // Trivial insertion. - if (VT.getSimpleVT() == N2.getValueType().getSimpleVT()) + if (VT.getSimpleVT() == N2.getSimpleValueType()) return N2; } break; @@ -3333,10 +3433,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2, N3); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2, N3); } AllNodes.push_back(N); @@ -3346,14 +3448,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; return getNode(Opcode, DL, VT, Ops, 4); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; @@ -3379,14 +3481,14 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { ArgChains.push_back(SDValue(L, 1)); // Build a tokenfactor for all the chains. - return getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other, + return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, &ArgChains[0], ArgChains.size()); } /// getMemsetValue - Vectorized representation of the memset value /// operand. static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, - DebugLoc dl) { + SDLoc dl) { assert(Value.getOpcode() != ISD::UNDEF); unsigned NumBits = VT.getScalarType().getSizeInBits(); @@ -3412,7 +3514,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, /// getMemsetStringVal - Similar to getMemsetValue. Except this is only /// used when a memcpy is turned into a memset when the source is a constant /// string ptr. -static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, +static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, const TargetLowering &TLI, StringRef Str) { // Handle vector with all elements zero. if (Str.empty()) { @@ -3454,10 +3556,10 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, /// getMemBasePlusOffset - Returns base and offset node for the /// -static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, +static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SDLoc dl, SelectionDAG &DAG) { EVT VT = Base.getValueType(); - return DAG.getNode(ISD::ADD, Base.getDebugLoc(), + return DAG.getNode(ISD::ADD, dl, VT, Base, DAG.getConstant(Offset, VT)); } @@ -3585,7 +3687,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, return true; } -static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, +static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, unsigned Align, bool isVol, @@ -3630,7 +3732,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); // Don't promote to an alignment that would require dynamic stack - // realignment. + // realignment. const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) while (NewAlign > Align && @@ -3671,7 +3773,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff)); if (Value.getNode()) Store = DAG.getStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, DAG), + getMemBasePlusOffset(Dst, DstOff, dl, DAG), DstPtrInfo.getWithOffset(DstOff), isVol, false, Align); } @@ -3685,11 +3787,11 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); assert(NVT.bitsGE(VT)); Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, - getMemBasePlusOffset(Src, SrcOff, DAG), + getMemBasePlusOffset(Src, SrcOff, dl, DAG), SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false, MinAlign(SrcAlign, SrcOff)); Store = DAG.getTruncStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, DAG), + getMemBasePlusOffset(Dst, DstOff, dl, DAG), DstPtrInfo.getWithOffset(DstOff), VT, isVol, false, Align); } @@ -3703,7 +3805,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, &OutChains[0], OutChains.size()); } -static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, +static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, unsigned Align, bool isVol, @@ -3755,10 +3857,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; - SDValue Value, Store; + SDValue Value; Value = DAG.getLoad(VT, dl, Chain, - getMemBasePlusOffset(Src, SrcOff, DAG), + getMemBasePlusOffset(Src, SrcOff, dl, DAG), SrcPtrInfo.getWithOffset(SrcOff), isVol, false, false, SrcAlign); LoadValues.push_back(Value); @@ -3771,10 +3873,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; - SDValue Value, Store; + SDValue Store; Store = DAG.getStore(Chain, dl, LoadValues[i], - getMemBasePlusOffset(Dst, DstOff, DAG), + getMemBasePlusOffset(Dst, DstOff, dl, DAG), DstPtrInfo.getWithOffset(DstOff), isVol, false, Align); OutChains.push_back(Store); DstOff += VTSize; @@ -3784,7 +3886,25 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, &OutChains[0], OutChains.size()); } -static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, +/// \brief Lower the call to 'memset' intrinsic function into a series of store +/// operations. +/// +/// \param DAG Selection DAG where lowered code is placed. +/// \param dl Link to corresponding IR location. +/// \param Chain Control flow dependency. +/// \param Dst Pointer to destination memory location. +/// \param Src Value of byte to write into the memory. +/// \param Size Number of bytes to write. +/// \param Align Alignment of the destination in bytes. +/// \param isVol True if destination is volatile. +/// \param DstPtrInfo IR information on the memory pointer. +/// \returns New head in the control flow, if lowering was successful, empty +/// SDValue otherwise. +/// +/// The function tries to replace 'llvm.memset' intrinsic with several store +/// operations and value calculation code. This is usually profitable for small +/// memory size. +static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, unsigned Align, bool isVol, @@ -3856,7 +3976,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, } assert(Value.getValueType() == VT && "Value with wrong type."); SDValue Store = DAG.getStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, DAG), + getMemBasePlusOffset(Dst, DstOff, dl, DAG), DstPtrInfo.getWithOffset(DstOff), isVol, false, Align); OutChains.push_back(Store); @@ -3868,7 +3988,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, &OutChains[0], OutChains.size()); } -SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, +SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, @@ -3914,29 +4034,31 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, // beyond the given memory regions. But fixing this isn't easy, and most // people don't care. + const TargetLowering *TLI = TM.getTargetLowering(); + // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); + Entry.Ty = TLI->getDataLayout()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); - // FIXME: pass in DebugLoc + // FIXME: pass in SDLoc TargetLowering:: CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMCPY), + TLI->getLibcallCallingConv(RTLIB::MEMCPY), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY), - TLI.getPointerTy()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), + TLI->getPointerTy()), Args, *this, dl); - std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } -SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, +SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo, @@ -3970,29 +4092,31 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, // FIXME: If the memmove is volatile, lowering it to plain libc memmove may // not be safe. See memcpy above for more details. + const TargetLowering *TLI = TM.getTargetLowering(); + // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); + Entry.Ty = TLI->getDataLayout()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); - // FIXME: pass in DebugLoc + // FIXME: pass in SDLoc TargetLowering:: CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMMOVE), + TLI->getLibcallCallingConv(RTLIB::MEMMOVE), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE), - TLI.getPointerTy()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), + TLI->getPointerTy()), Args, *this, dl); - std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } -SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, +SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo) { @@ -4023,7 +4147,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, return Result; // Emit a library call. - Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext()); + const TargetLowering *TLI = TM.getTargetLowering(); + Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; @@ -4041,22 +4166,53 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, Entry.Ty = IntPtrTy; Entry.isSExt = false; Args.push_back(Entry); - // FIXME: pass in DebugLoc + // FIXME: pass in SDLoc TargetLowering:: CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMSET), + TLI->getLibcallCallingConv(RTLIB::MEMSET), /*isTailCall=*/false, /*doesNotReturn*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), - TLI.getPointerTy()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), + TLI->getPointerTy()), Args, *this, dl); - std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, + SDVTList VTList, SDValue* Ops, unsigned NumOps, + MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<AtomicSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + // Allocate the operands array for the node out of the BumpPtrAllocator, since + // SDNode doesn't have access to it. This memory will be "leaked" when + // the node is deallocated, but recovered when the allocator is released. + // If the number of operands is less than 5 we use AtomicSDNode's internal + // storage. + SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate<SDUse>(NumOps) : 0; + + SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, MemVT, + Ops, DynOps, NumOps, MMO, + Ordering, SynchScope); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, @@ -4084,7 +4240,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachineMemOperand *MMO, @@ -4096,25 +4252,11 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, EVT VT = Cmp.getValueType(); SDVTList VTs = getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 4); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast<AtomicSDNode>(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, - Ptr, Cmp, Swp, MMO, Ordering, - SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value* PtrVal, @@ -4145,7 +4287,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO, @@ -4169,25 +4311,11 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) : getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Val}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 3); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast<AtomicSDNode>(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, - Ptr, Val, MMO, - Ordering, SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 3, MMO, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, const Value* PtrVal, @@ -4218,7 +4346,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO, @@ -4227,26 +4355,13 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op"); SDVTList VTs = getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 2); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast<AtomicSDNode>(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, - Ptr, MMO, Ordering, SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 2, MMO, Ordering, SynchScope); } /// getMergeValues - Create a MERGE_VALUES node from the given operands. SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, - DebugLoc dl) { + SDLoc dl) { if (NumOps == 1) return Ops[0]; @@ -4259,7 +4374,7 @@ SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, } SDValue -SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps, EVT MemVT, MachinePointerInfo PtrInfo, @@ -4271,7 +4386,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, } SDValue -SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, const SDValue *Ops, unsigned NumOps, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, @@ -4294,7 +4409,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, } SDValue -SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, const SDValue *Ops, unsigned NumOps, EVT MemVT, MachineMemOperand *MMO) { assert((Opcode == ISD::INTRINSIC_VOID || @@ -4318,12 +4433,14 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, return SDValue(E, 0); } - N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, - MemVT, MMO); + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, Ops, + NumOps, MemVT, MMO); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, - MemVT, MMO); + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, Ops, + NumOps, MemVT, MMO); } AllNodes.push_back(N); return SDValue(N, 0); @@ -4365,7 +4482,7 @@ static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) { SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, - EVT VT, DebugLoc dl, SDValue Chain, + EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, bool isInvariant, @@ -4398,7 +4515,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, - EVT VT, DebugLoc dl, SDValue Chain, + EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, EVT MemVT, MachineMemOperand *MMO) { if (VT == MemVT) { @@ -4437,14 +4554,15 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, cast<LoadSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl, VTs, AM, ExtType, + SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, AM, ExtType, MemVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } -SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, +SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, @@ -4457,7 +4575,15 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, TBAAInfo, Ranges); } -SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, +SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, + SDValue Chain, SDValue Ptr, + MachineMemOperand *MMO) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, + VT, MMO); +} + +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, @@ -4469,8 +4595,16 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, } +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, + SDValue Chain, SDValue Ptr, EVT MemVT, + MachineMemOperand *MMO) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, + MemVT, MMO); +} + SDValue -SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, +SelectionDAG::getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM) { LoadSDNode *LD = cast<LoadSDNode>(OrigLoad); assert(LD->getOffset().getOpcode() == ISD::UNDEF && @@ -4481,7 +4615,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, false, LD->getAlignment()); } -SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, +SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { @@ -4508,7 +4642,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, return getStore(Chain, dl, Val, Ptr, MMO); } -SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, +SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachineMemOperand *MMO) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); @@ -4527,14 +4661,15 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, - false, VT, MMO); + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, + ISD::UNINDEXED, false, VT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } -SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, +SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT,bool isVolatile, bool isNonTemporal, unsigned Alignment, @@ -4561,7 +4696,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); } -SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, +SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, EVT SVT, MachineMemOperand *MMO) { EVT VT = Val.getValueType(); @@ -4595,15 +4730,16 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, - true, SVT, MMO); + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, + ISD::UNINDEXED, true, SVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } SDValue -SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, +SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM) { StoreSDNode *ST = cast<StoreSDNode>(OrigStore); assert(ST->getOffset().getOpcode() == ISD::UNDEF && @@ -4619,7 +4755,8 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, AM, + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, AM, ST->isTruncatingStore(), ST->getMemoryVT(), ST->getMemOperand()); @@ -4628,7 +4765,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, return SDValue(N, 0); } -SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl, +SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, SDValue SV, unsigned Align) { @@ -4636,7 +4773,7 @@ SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl, return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, const SDUse *Ops, unsigned NumOps) { switch (NumOps) { case 0: return getNode(Opcode, DL, VT); @@ -4652,7 +4789,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, return getNode(Opcode, DL, VT, &NewOps[0], NumOps); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, const SDValue *Ops, unsigned NumOps) { switch (NumOps) { case 0: return getNode(Opcode, DL, VT); @@ -4694,10 +4831,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTs, Ops, NumOps); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTs, Ops, NumOps); } AllNodes.push_back(N); @@ -4707,14 +4846,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, ArrayRef<EVT> ResultTys, const SDValue *Ops, unsigned NumOps) { return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()), Ops, NumOps); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps) { if (NumVTs == 1) @@ -4722,7 +4861,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, const SDValue *Ops, unsigned NumOps) { if (VTList.NumVTs == 1) return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps); @@ -4760,26 +4899,36 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, return SDValue(E, 0); if (NumOps == 1) { - N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0]); } else if (NumOps == 2) { - N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1]); } else if (NumOps == 3) { - N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], - Ops[2]); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1], Ops[2]); } else { - N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTList, Ops, NumOps); } CSEMap.InsertNode(N, IP); } else { if (NumOps == 1) { - N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0]); } else if (NumOps == 2) { - N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1]); } else if (NumOps == 3) { - N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], - Ops[2]); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1], Ops[2]); } else { - N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTList, Ops, NumOps); } } AllNodes.push_back(N); @@ -4789,36 +4938,36 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) { +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList) { return getNode(Opcode, DL, VTList, 0, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1) { SDValue Ops[] = { N1 }; return getNode(Opcode, DL, VTList, Ops, 1); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2) { SDValue Ops[] = { N1, N2 }; return getNode(Opcode, DL, VTList, Ops, 2); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3) { SDValue Ops[] = { N1, N2, N3 }; return getNode(Opcode, DL, VTList, Ops, 3); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; return getNode(Opcode, DL, VTList, Ops, 4); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; @@ -4830,76 +4979,81 @@ SDVTList SelectionDAG::getVTList(EVT VT) { } SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { - for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) - if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2) - return *I; - - EVT *Array = Allocator.Allocate<EVT>(2); - Array[0] = VT1; - Array[1] = VT2; - SDVTList Result = makeVTList(Array, 2); - VTList.push_back(Result); - return Result; + FoldingSetNodeID ID; + ID.AddInteger(2U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate<EVT>(2); + Array[0] = VT1; + Array[1] = VT2; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); } SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { - for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) - if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && - I->VTs[2] == VT3) - return *I; - - EVT *Array = Allocator.Allocate<EVT>(3); - Array[0] = VT1; - Array[1] = VT2; - Array[2] = VT3; - SDVTList Result = makeVTList(Array, 3); - VTList.push_back(Result); - return Result; + FoldingSetNodeID ID; + ID.AddInteger(3U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + ID.AddInteger(VT3.getRawBits()); + + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate<EVT>(3); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); } SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { - for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) - if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && - I->VTs[2] == VT3 && I->VTs[3] == VT4) - return *I; - - EVT *Array = Allocator.Allocate<EVT>(4); - Array[0] = VT1; - Array[1] = VT2; - Array[2] = VT3; - Array[3] = VT4; - SDVTList Result = makeVTList(Array, 4); - VTList.push_back(Result); - return Result; + FoldingSetNodeID ID; + ID.AddInteger(4U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + ID.AddInteger(VT3.getRawBits()); + ID.AddInteger(VT4.getRawBits()); + + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate<EVT>(4); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Array[3] = VT4; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); } SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { - switch (NumVTs) { - case 0: llvm_unreachable("Cannot have nodes without results!"); - case 1: return getVTList(VTs[0]); - case 2: return getVTList(VTs[0], VTs[1]); - case 3: return getVTList(VTs[0], VTs[1], VTs[2]); - case 4: return getVTList(VTs[0], VTs[1], VTs[2], VTs[3]); - default: break; + FoldingSetNodeID ID; + ID.AddInteger(NumVTs); + for (unsigned index = 0; index < NumVTs; index++) { + ID.AddInteger(VTs[index].getRawBits()); } - for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) { - if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1]) - continue; - - if (std::equal(&VTs[2], &VTs[NumVTs], &I->VTs[2])) - return *I; + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate<EVT>(NumVTs); + std::copy(VTs, VTs + NumVTs, Array); + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs); + VTListMap.InsertNode(Result, IP); } - - EVT *Array = Allocator.Allocate<EVT>(NumVTs); - std::copy(VTs, VTs+NumVTs, Array); - SDVTList Result = makeVTList(Array, NumVTs); - VTList.push_back(Result); - return Result; + return Result->getSDVTList(); } @@ -5138,17 +5292,21 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, return N; } -/// UpdadeDebugLocOnMergedSDNode - If the opt level is -O0 then it throws away +/// UpdadeSDLocOnMergedSDNode - If the opt level is -O0 then it throws away /// the line number information on the merged node since it is not possible to /// preserve the information that operation is associated with multiple lines. /// This will make the debugger working better at -O0, were there is a higher /// probability having other instructions associated with that line. /// -SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) { +/// For IROrder, we keep the smaller of the two +SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) { DebugLoc NLoc = N->getDebugLoc(); - if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && (OLoc != NLoc)) { + if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && + (OLoc.getDebugLoc() != NLoc)) { N->setDebugLoc(DebugLoc()); } + unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder()); + N->setIROrder(Order); return N; } @@ -5157,7 +5315,7 @@ SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) { /// /// Note that MorphNodeTo returns the resultant node. If there is already a /// node of the specified opcode and operands, it returns that node instead of -/// the current one. Note that the DebugLoc need not be the same. +/// the current one. Note that the SDLoc need not be the same. /// /// Using MorphNodeTo is faster than creating a new node and swapping it in /// with ReplaceAllUsesWith both because it often avoids allocating a new @@ -5173,7 +5331,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) - return UpdadeDebugLocOnMergedSDNode(ON, N->getDebugLoc()); + return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N)); } if (!RemoveNodeFromCSEMaps(N)) @@ -5250,20 +5408,20 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, /// node of the specified opcode and operands, it returns that node instead of /// the current one. MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) { +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT) { SDVTList VTs = getVTList(VT); return getMachineNode(Opcode, dl, VTs, None); } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) { +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, SDValue Op1) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1 }; return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2 }; @@ -5271,7 +5429,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2, Op3 }; @@ -5279,20 +5437,20 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT); return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) { +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); return getMachineNode(Opcode, dl, VTs, None); } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, SDValue Op1) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1 }; @@ -5300,7 +5458,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2 }; @@ -5308,7 +5466,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2); @@ -5317,7 +5475,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2); @@ -5325,7 +5483,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, EVT VT3, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2, VT3); @@ -5334,7 +5492,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, EVT VT3, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2, VT3); @@ -5343,7 +5501,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, EVT VT3, ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3); @@ -5351,7 +5509,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, EVT VT3, EVT VT4, ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); @@ -5359,7 +5517,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size()); @@ -5367,7 +5525,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, ArrayRef<SDValue> OpsArray) { bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; MachineSDNode *N; @@ -5380,12 +5538,13 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - return cast<MachineSDNode>(UpdadeDebugLocOnMergedSDNode(E, DL)); + return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL)); } } // Allocate a new MachineSDNode. - N = new (NodeAllocator) MachineSDNode(~Opcode, DL, VTs); + N = new (NodeAllocator) MachineSDNode(~Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs); // Initialize the operands list. if (NumOps > array_lengthof(N->LocalOperands)) @@ -5411,7 +5570,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, /// getTargetExtractSubreg - A convenience function for creating /// TargetOpcode::EXTRACT_SUBREG nodes. SDValue -SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT, +SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand) { SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, @@ -5422,7 +5581,7 @@ SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT, /// getTargetInsertSubreg - A convenience function for creating /// TargetOpcode::INSERT_SUBREG nodes. SDValue -SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT, +SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand, SDValue Subreg) { SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL, @@ -5845,18 +6004,6 @@ unsigned SelectionDAG::AssignTopologicalOrder() { return DAGSize; } -/// AssignOrdering - Assign an order to the SDNode. -void SelectionDAG::AssignOrdering(const SDNode *SD, unsigned Order) { - assert(SD && "Trying to assign an order to a null node!"); - Ordering->add(SD, Order); -} - -/// GetOrdering - Get the order for the SDNode. -unsigned SelectionDAG::GetOrdering(const SDNode *SD) const { - assert(SD && "Trying to get the order of a null node!"); - return Ordering->getOrder(SD); -} - /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the /// value is produced by SD. void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { @@ -5883,7 +6030,7 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { ClonedDVs.push_back(Clone); } } - for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(), + for (SmallVectorImpl<SDDbgValue *>::iterator I = ClonedDVs.begin(), E = ClonedDVs.end(); I != E; ++I) AddDbgValue(*I, ToNode, false); } @@ -5896,16 +6043,22 @@ HandleSDNode::~HandleSDNode() { DropOperands(); } -GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, DebugLoc DL, - const GlobalValue *GA, +GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order, + DebugLoc DL, const GlobalValue *GA, EVT VT, int64_t o, unsigned char TF) - : SDNode(Opc, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { + : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { TheGlobal = GA; } -MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, - MachineMemOperand *mmo) - : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) { +AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, DebugLoc dl, EVT VT, + SDValue X, unsigned SrcAS, + unsigned DestAS) + : UnarySDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT), X), + SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {} + +MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, + EVT memvt, MachineMemOperand *mmo) + : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); @@ -5914,10 +6067,10 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); } -MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, +MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, const SDValue *Ops, unsigned NumOps, EVT memvt, MachineMemOperand *mmo) - : SDNode(Opc, dl, VTs, Ops, NumOps), + : SDNode(Opc, Order, dl, VTs, Ops, NumOps), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant()); @@ -6064,9 +6217,10 @@ bool SDNode::hasPredecessor(const SDNode *N) const { return hasPredecessorHelper(N, Visited, Worklist); } -bool SDNode::hasPredecessorHelper(const SDNode *N, - SmallPtrSet<const SDNode *, 32> &Visited, - SmallVector<const SDNode *, 16> &Worklist) const { +bool +SDNode::hasPredecessorHelper(const SDNode *N, + SmallPtrSet<const SDNode *, 32> &Visited, + SmallVectorImpl<const SDNode *> &Worklist) const { if (Visited.empty()) { Worklist.push_back(this); } else { @@ -6103,7 +6257,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { EVT VT = N->getValueType(0); unsigned NE = VT.getVectorNumElements(); EVT EltVT = VT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SmallVector<SDValue, 8> Scalars; SmallVector<SDValue, 4> Operands(N->getNumOperands()); @@ -6121,11 +6275,12 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { EVT OperandVT = Operand.getValueType(); if (OperandVT.isVector()) { // A vector operand; extract a single element. + const TargetLowering *TLI = TM.getTargetLowering(); EVT OperandEltVT = OperandVT.getVectorElementType(); Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, - getConstant(i, TLI.getPointerTy())); + getConstant(i, TLI->getVectorIdxTy())); } else { // A scalar operand; just use it as is. Operands[j] = Operand; @@ -6147,8 +6302,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { case ISD::ROTL: case ISD::ROTR: Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], - getShiftAmountOperand(Operands[0].getValueType(), - Operands[1]))); + getShiftAmountOperand(Operands[0].getValueType(), + Operands[1]))); break; case ISD::SIGN_EXTEND_INREG: case ISD::FP_ROUND_INREG: { @@ -6203,8 +6358,9 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, const GlobalValue *GV2 = NULL; int64_t Offset1 = 0; int64_t Offset2 = 0; - bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); - bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); + const TargetLowering *TLI = TM.getTargetLowering(); + bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1); + bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); if (isGA1 && isGA2 && GV1 == GV2) return Offset1 == (Offset2 + Dist*Bytes); return false; @@ -6217,11 +6373,12 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. const GlobalValue *GV; int64_t GVOffset = 0; - if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - unsigned PtrWidth = TLI.getPointerTy().getSizeInBits(); + const TargetLowering *TLI = TM.getTargetLowering(); + if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { + unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType()); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, - TLI.getDataLayout()); + TLI->getDataLayout()); unsigned AlignBits = KnownZero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) @@ -6251,6 +6408,38 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { return 0; } +/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type +/// which is split (or expanded) into two not necessarily identical pieces. +std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const { + // Currently all types are split in half. + EVT LoVT, HiVT; + if (!VT.isVector()) { + LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT); + } else { + unsigned NumElements = VT.getVectorNumElements(); + assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + LoVT = HiVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), + NumElements/2); + } + return std::make_pair(LoVT, HiVT); +} + +/// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the +/// low/high part. +std::pair<SDValue, SDValue> +SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, + const EVT &HiVT) { + assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <= + N.getValueType().getVectorNumElements() && + "More vector elements requested than available!"); + SDValue Lo, Hi; + Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, + getConstant(0, TLI->getVectorIdxTy())); + Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N, + getConstant(LoVT.getVectorNumElements(), TLI->getVectorIdxTy())); + return std::make_pair(Lo, Hi); +} + // getAddressSpace - Return the address space this GlobalAddress belongs to. unsigned GlobalAddressSDNode::getAddressSpace() const { return getGlobal()->getType()->getAddressSpace(); @@ -6372,7 +6561,7 @@ static void checkForCyclesHelper(const SDNode *N, void llvm::checkForCycles(const llvm::SDNode *N) { #ifdef XDEBUG - assert(N && "Checking nonexistant SDNode"); + assert(N && "Checking nonexistent SDNode"); SmallPtrSet<const SDNode*, 32> visited; SmallPtrSet<const SDNode*, 32> checked; checkForCyclesHelper(N, visited, checked); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 194aba8..2b2713d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -15,6 +15,7 @@ #include "SelectionDAGBuilder.h" #include "SDNodeDbgValue.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" @@ -32,6 +33,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" @@ -48,7 +50,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/IntegersSubsetMapping.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" @@ -57,6 +58,7 @@ #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include <algorithm> using namespace llvm; @@ -87,7 +89,7 @@ LimitFPPrecision("limit-float-precision", // store [4096 x i8] %data, [4096 x i8]* %buffer static const unsigned MaxParallelChains = 64; -static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V); @@ -96,7 +98,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, /// larger then ValueVT then AssertOp can be used to specify whether the extra /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT /// (ISD::AssertSext). -static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, +static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, @@ -217,7 +219,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, /// type larger then ValueVT then AssertOp can be used to specify whether the /// extra bits are known to be zero (ISD::AssertZext) or sign extended from /// ValueVT (ISD::AssertSext). -static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V) { assert(ValueVT.isVector() && "Not a vector value"); @@ -280,7 +282,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && "Cannot narrow, it would be a lossy transformation"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); } // Vector/Vector bitcast. @@ -327,14 +329,14 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } -static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl, +static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc dl, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. -static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, +static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { @@ -466,7 +468,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, /// getCopyToPartsVector - Create a series of nodes that contain the specified /// value split into legal parts. -static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, +static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V) { EVT ValueVT = Val.getValueType(); @@ -489,7 +491,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, SmallVector<SDValue, 16> Ops; for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - ElementVT, Val, DAG.getIntPtrConstant(i))); + ElementVT, Val, DAG.getConstant(i, + TLI.getVectorIdxTy()))); for (unsigned i = ValueVT.getVectorNumElements(), e = PartVT.getVectorNumElements(); i != e; ++i) @@ -515,7 +518,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - PartVT, Val, DAG.getIntPtrConstant(0)); + PartVT, Val, DAG.getConstant(0, TLI.getVectorIdxTy())); bool Smaller = ValueVT.bitsLE(PartVT); Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), @@ -545,10 +548,12 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, if (IntermediateVT.isVector()) Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, - DAG.getIntPtrConstant(i * (NumElements / NumIntermediates))); + DAG.getConstant(i * (NumElements / NumIntermediates), + TLI.getVectorIdxTy())); else Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - IntermediateVT, Val, DAG.getIntPtrConstant(i)); + IntermediateVT, Val, + DAG.getConstant(i, TLI.getVectorIdxTy())); } // Split the intermediate operands into legal parts. @@ -644,7 +649,7 @@ namespace { /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, - DebugLoc dl, + SDLoc dl, SDValue &Chain, SDValue *Flag, const Value *V = 0) const; @@ -652,7 +657,7 @@ namespace { /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. - void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + void getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, SDValue *Flag, const Value *V) const; /// AddInlineAsmOperands - Add this value to the specified inlineasm node @@ -671,7 +676,7 @@ namespace { /// If the Flag pointer is NULL, no flag is used. SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, - DebugLoc dl, + SDLoc dl, SDValue &Chain, SDValue *Flag, const Value *V) const { // A Value with type {} or [0 x %t] needs no registers. @@ -717,6 +722,14 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, unsigned NumSignBits = LOI->NumSignBits; unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); + if (NumZeroBits == RegSize) { + // The current value is a zero. + // Explicitly express that as it would be easier for + // optimizations to kick in. + Parts[i] = DAG.getConstant(0, RegisterVT); + continue; + } + // FIXME: We capture more information than the dag can represent. For // now, just use the tightest assertzext/assertsext possible. bool isSExt = true; @@ -761,7 +774,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. -void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, SDValue *Flag, const Value *V) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -869,7 +882,7 @@ void SelectionDAGBuilder::clear() { UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); - CurDebugLoc = DebugLoc(); + CurInst = NULL; HasTailCall = false; } @@ -900,7 +913,7 @@ SDValue SelectionDAGBuilder::getRoot() { } // Otherwise, we have to make a token factor node. - SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &PendingLoads[0], PendingLoads.size()); PendingLoads.clear(); DAG.setRoot(Root); @@ -930,7 +943,7 @@ SDValue SelectionDAGBuilder::getControlRoot() { PendingExports.push_back(Root); } - Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &PendingExports[0], PendingExports.size()); PendingExports.clear(); @@ -938,27 +951,21 @@ SDValue SelectionDAGBuilder::getControlRoot() { return Root; } -void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) { - if (DAG.GetOrdering(Node) != 0) return; // Already has ordering. - DAG.AssignOrdering(Node, SDNodeOrder); - - for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) - AssignOrderingToNode(Node->getOperand(I).getNode()); -} - void SelectionDAGBuilder::visit(const Instruction &I) { // Set up outgoing PHI node register values before emitting the terminator. if (isa<TerminatorInst>(&I)) HandlePHINodesInSuccessorBlocks(I.getParent()); - CurDebugLoc = I.getDebugLoc(); + ++SDNodeOrder; + + CurInst = &I; visit(I.getOpcode(), I); if (!isa<TerminatorInst>(&I) && !HasTailCall) CopyToExportRegsIfNeeded(&I); - CurDebugLoc = DebugLoc(); + CurInst = NULL; } void SelectionDAGBuilder::visitPHI(const PHINode &) { @@ -975,12 +982,6 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; #include "llvm/IR/Instruction.def" } - - // Assign the ordering to the freshly created DAG nodes. - if (NodeMap.count(&I)) { - ++SDNodeOrder; - AssignOrderingToNode(getValue(&I).getNode()); - } } // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, @@ -1002,7 +1003,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DAG.AddDbgValue(SDV, Val.getNode(), false); } } else - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); DanglingDebugInfoMap[V] = DanglingDebugInfo(); } } @@ -1020,9 +1021,10 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; - RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); + RegsForValue RFV(*DAG.getContext(), *TM.getTargetLowering(), + InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); - N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); + N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V); resolveDanglingDebugInfo(V, N); return N; } @@ -1051,17 +1053,21 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { /// getValueImpl - Helper function for getValue and getNonRegisterValue. /// Create an SDValue for the given value. SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { + const TargetLowering *TLI = TM.getTargetLowering(); + if (const Constant *C = dyn_cast<Constant>(V)) { - EVT VT = TLI.getValueType(V->getType(), true); + EVT VT = TLI->getValueType(V->getType(), true); if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) return DAG.getConstant(*CI, VT); if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) - return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT); + return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); - if (isa<ConstantPointerNull>(C)) - return DAG.getConstant(0, TLI.getPointerTy()); + if (isa<ConstantPointerNull>(C)) { + unsigned AS = V->getType()->getPointerAddressSpace(); + return DAG.getConstant(0, TLI->getPointerTy(AS)); + } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) return DAG.getConstantFP(*CFP, VT); @@ -1090,9 +1096,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } return DAG.getMergeValues(&Constants[0], Constants.size(), - getCurDebugLoc()); + getCurSDLoc()); } - + if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(C)) { SmallVector<SDValue, 4> Ops; @@ -1105,8 +1111,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } if (isa<ArrayType>(CDS->getType())) - return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc()); - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + return DAG.getMergeValues(&Ops[0], Ops.size(), getCurSDLoc()); + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, &Ops[0], Ops.size()); } @@ -1115,7 +1121,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { "Unknown struct or array constant!"); SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, C->getType(), ValueVTs); + ComputeValueVTs(*TLI, C->getType(), ValueVTs); unsigned NumElts = ValueVTs.size(); if (NumElts == 0) return SDValue(); // empty struct @@ -1131,7 +1137,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } return DAG.getMergeValues(&Constants[0], NumElts, - getCurDebugLoc()); + getCurSDLoc()); } if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) @@ -1148,7 +1154,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Ops.push_back(getValue(CV->getOperand(i))); } else { assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); - EVT EltVT = TLI.getValueType(VecTy->getElementType()); + EVT EltVT = TLI->getValueType(VecTy->getElementType()); SDValue Op; if (EltVT.isFloatingPoint()) @@ -1159,7 +1165,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } // Create a BUILD_VECTOR node. - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, &Ops[0], Ops.size()); } @@ -1169,21 +1175,22 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) - return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); + return DAG.getFrameIndex(SI->second, TLI->getPointerTy()); } // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast<Instruction>(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); - RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); + RegsForValue RFV(*DAG.getContext(), *TLI, InReg, Inst->getType()); SDValue Chain = DAG.getEntryNode(); - return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V); } llvm_unreachable("Can't get register for value!"); } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { + const TargetLowering *TLI = TM.getTargetLowering(); SDValue Chain = getControlRoot(); SmallVector<ISD::OutputArg, 8> Outs; SmallVector<SDValue, 8> OutVals; @@ -1196,7 +1203,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. SmallVector<EVT, 1> PtrValueVTs; - ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), + ComputeValueVTs(*TLI, PointerType::getUnqual(F->getReturnType()), PtrValueVTs); SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); @@ -1204,26 +1211,26 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); + ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); SmallVector<SDValue, 4> Chains(NumValues); for (unsigned i = 0; i != NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), + SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), RetPtr.getValueType(), RetPtr, DAG.getIntPtrConstant(Offsets[i])); Chains[i] = - DAG.getStore(Chain, getCurDebugLoc(), + DAG.getStore(Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), // FIXME: better loc info would be nice. Add, MachinePointerInfo(), false, false, 0); } - Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], NumValues); } else if (I.getNumOperands() != 0) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); + ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues) { SDValue RetOp = getValue(I.getOperand(0)); @@ -1241,12 +1248,12 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ExtendKind = ISD::ZERO_EXTEND; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) - VT = TLI.getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind); + VT = TLI->getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind); - unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); - MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); + unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), VT); + MVT PartVT = TLI->getRegisterType(*DAG.getContext(), VT); SmallVector<SDValue, 4> Parts(NumParts); - getCopyToParts(DAG, getCurDebugLoc(), + getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), &Parts[0], NumParts, PartVT, &I, ExtendKind); @@ -1264,7 +1271,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { for (unsigned i = 0; i < NumParts; ++i) { Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), - /*isfixed=*/true, 0, 0)); + VT, /*isfixed=*/true, 0, 0)); OutVals.push_back(Parts[i]); } } @@ -1274,8 +1281,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); CallingConv::ID CallConv = DAG.getMachineFunction().getFunction()->getCallingConv(); - Chain = TLI.LowerReturn(Chain, CallConv, isVarArg, - Outs, OutVals, getCurDebugLoc(), DAG); + Chain = TM.getTargetLowering()->LowerReturn(Chain, CallConv, isVarArg, + Outs, OutVals, getCurSDLoc(), + DAG); // Verify that the target's LowerReturn behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && @@ -1474,7 +1482,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, /// If we should emit this as a bunch of and/or'd together conditions, return /// false. bool -SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){ +SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) { if (Cases.size() != 2) return true; // If this is two comparisons of the same values or'd or and'd together, they @@ -1519,7 +1527,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // If this is not a fall-through branch, emit the branch. if (Succ0MBB != NextBlock) - DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Succ0MBB))); @@ -1548,7 +1556,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo // if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { - if (!TLI.isJumpExpensive() && + if (!TM.getTargetLowering()->isJumpExpensive() && BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || BOp->getOpcode() == Instruction::Or)) { @@ -1596,7 +1604,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB) { SDValue Cond; SDValue CondLHS = getValue(CB.CmpLHS); - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); // Build the setcc now. if (CB.CmpMHS == NULL) { @@ -1612,18 +1620,17 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } else Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); } else { - assert(CB.CC == ISD::SETCC_INVALID && - "Condition is undefined for to-the-range belonging check."); + assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); SDValue CmpOp = getValue(CB.CmpMHS); EVT VT = CmpOp.getValueType(); - - if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) { + + if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), - ISD::SETULE); + ISD::SETLE); } else { SDValue SUB = DAG.getNode(ISD::SUB, dl, VT, CmpOp, DAG.getConstant(Low, VT)); @@ -1671,11 +1678,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { // Emit the code for the jump table assert(JT.Reg != -1U && "Should lower JT Header first!"); - EVT PTy = TLI.getPointerTy(); - SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), + EVT PTy = TM.getTargetLowering()->getPointerTy(); + SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); - SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(), + SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(), MVT::Other, Index.getValue(1), Table, Index); DAG.setRoot(BrJumpTable); @@ -1691,7 +1698,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // difference between smallest and largest cases. SDValue SwitchOp = getValue(JTH.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, DAG.getConstant(JTH.First, VT)); // The SDNode we just created, which holds the value being switched on minus @@ -1699,19 +1706,22 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // can be used as an index into the jump table in a subsequent basic block. // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. - SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy()); + const TargetLowering *TLI = TM.getTargetLowering(); + SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI->getPointerTy()); - unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); - SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + unsigned JumpTableReg = FuncInfo.CreateReg(TLI->getPointerTy()); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; // Emit the range check for the jump table, and branch to the default block // for the switch statement if the value being switched on exceeds the largest // case in the switch. - SDValue CMP = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(Sub.getValueType()), Sub, - DAG.getConstant(JTH.Last-JTH.First,VT), + SDValue CMP = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, + DAG.getConstant(JTH.Last - JTH.First,VT), ISD::SETUGT); // Set NextBlock to be the MBB immediately after the current one, if any. @@ -1722,17 +1732,88 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; - SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), MVT::Other, CopyTo, CMP, DAG.getBasicBlock(JT.Default)); if (JT.MBB != NextBlock) - BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond, + BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond, DAG.getBasicBlock(JT.MBB)); DAG.setRoot(BrCond); } +/// Codegen a new tail for a stack protector check ParentMBB which has had its +/// tail spliced into a stack protector check success bb. +/// +/// For a high level explanation of how this fits into the stack protector +/// generation see the comment on the declaration of class +/// StackProtectorDescriptor. +void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, + MachineBasicBlock *ParentBB) { + + // First create the loads to the guard/stack slot for the comparison. + const TargetLowering *TLI = TM.getTargetLowering(); + EVT PtrTy = TLI->getPointerTy(); + + MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); + int FI = MFI->getStackProtectorIndex(); + + const Value *IRGuard = SPD.getGuard(); + SDValue GuardPtr = getValue(IRGuard); + SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); + + unsigned Align = + TLI->getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); + SDValue Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + GuardPtr, MachinePointerInfo(IRGuard, 0), + true, false, false, Align); + + SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + StackSlotPtr, + MachinePointerInfo::getFixedStack(FI), + true, false, false, Align); + + // Perform the comparison via a subtract/getsetcc. + EVT VT = Guard.getValueType(); + SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot); + + SDValue Cmp = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(0, VT), + ISD::SETNE); + + // If the sub is not 0, then we know the guard/stackslot do not equal, so + // branch to failure MBB. + SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + MVT::Other, StackSlot.getOperand(0), + Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); + // Otherwise branch to success MBB. + SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(), + MVT::Other, BrCond, + DAG.getBasicBlock(SPD.getSuccessMBB())); + + DAG.setRoot(Br); +} + +/// Codegen the failure basic block for a stack protector check. +/// +/// A failure stack protector machine basic block consists simply of a call to +/// __stack_chk_fail(). +/// +/// For a high level explanation of how this fits into the stack protector +/// generation see the comment on the declaration of class +/// StackProtectorDescriptor. +void +SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { + const TargetLowering *TLI = TM.getTargetLowering(); + SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, + MVT::isVoid, 0, 0, false, getCurSDLoc(), + false, false).second; + DAG.setRoot(Chain); +} + /// visitBitTestHeader - This function emits necessary code to produce value /// suitable for "bit tests" void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, @@ -1740,18 +1821,20 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, // Subtract the minimum value SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, DAG.getConstant(B.First, VT)); // Check range - SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(Sub.getValueType()), + const TargetLowering *TLI = TM.getTargetLowering(); + SDValue RangeCmp = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT); // Determine the type of the test operands. bool UsePtrType = false; - if (!TLI.isTypeLegal(VT)) + if (!TLI->isTypeLegal(VT)) UsePtrType = true; else { for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) @@ -1763,13 +1846,13 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, } } if (UsePtrType) { - VT = TLI.getPointerTy(); - Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT); + VT = TLI->getPointerTy(); + Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT); } B.RegVT = VT.getSimpleVT(); B.Reg = FuncInfo.CreateReg(B.RegVT); - SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), B.Reg, Sub); // Set NextBlock to be the MBB immediately after the current one, if any. @@ -1784,12 +1867,12 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, addSuccessorWithWeight(SwitchBB, B.Default); addSuccessorWithWeight(SwitchBB, MBB); - SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurSDLoc(), MVT::Other, CopyTo, RangeCmp, DAG.getBasicBlock(B.Default)); if (MBB != NextBlock) - BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo, + BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo, DAG.getBasicBlock(MBB)); DAG.setRoot(BrRange); @@ -1803,35 +1886,36 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, BitTestCase &B, MachineBasicBlock *SwitchBB) { MVT VT = BB.RegVT; - SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), + SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), Reg, VT); SDValue Cmp; unsigned PopCount = CountPopulation_64(B.Mask); + const TargetLowering *TLI = TM.getTargetLowering(); if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. - Cmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(VT), + Cmp = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), VT), ShiftOp, - DAG.getConstant(CountTrailingZeros_64(B.Mask), VT), + DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. - Cmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(VT), + Cmp = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), VT), ShiftOp, DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), ISD::SETNE); } else { // Make desired shift - SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT, + SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT, DAG.getConstant(1, VT), ShiftOp); // Emit bit tests and jumps - SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), + SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(), VT, SwitchVal, DAG.getConstant(B.Mask, VT)); - Cmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(VT), + Cmp = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), VT), AndOp, DAG.getConstant(0, VT), ISD::SETNE); } @@ -1841,7 +1925,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); - SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurSDLoc(), MVT::Other, getControlRoot(), Cmp, DAG.getBasicBlock(B.TargetBB)); @@ -1853,7 +1937,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, NextBlock = BBI; if (NextMBB != NextBlock) - BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd, + BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd, DAG.getBasicBlock(NextMBB)); DAG.setRoot(BrAnd); @@ -1885,7 +1969,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { addSuccessorWithWeight(InvokeMBB, LandingPad); // Drop into normal successor. - DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Return))); } @@ -1904,28 +1988,29 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother to create these DAG nodes. - if (TLI.getExceptionPointerRegister() == 0 && - TLI.getExceptionSelectorRegister() == 0) + const TargetLowering *TLI = TM.getTargetLowering(); + if (TLI->getExceptionPointerRegister() == 0 && + TLI->getExceptionSelectorRegister() == 0) return; SmallVector<EVT, 2> ValueVTs; - ComputeValueVTs(TLI, LP.getType(), ValueVTs); + ComputeValueVTs(*TLI, LP.getType(), ValueVTs); assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); // Get the two live-in registers as SDValues. The physregs have already been // copied into virtual registers. SDValue Ops[2]; Ops[0] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurDebugLoc(), - FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), - getCurDebugLoc(), ValueVTs[0]); + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + FuncInfo.ExceptionPointerVirtReg, TLI->getPointerTy()), + getCurSDLoc(), ValueVTs[0]); Ops[1] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurDebugLoc(), - FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), - getCurDebugLoc(), ValueVTs[1]); + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + FuncInfo.ExceptionSelectorVirtReg, TLI->getPointerTy()), + getCurSDLoc(), ValueVTs[1]); // Merge into one. - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&ValueVTs[0], ValueVTs.size()), &Ops[0], 2); setValue(&LP, Res); @@ -1979,7 +2064,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, SDValue CondLHS = getValue(SV); EVT VT = CondLHS.getValueType(); - DebugLoc DL = getCurDebugLoc(); + SDLoc DL = getCurSDLoc(); SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, DAG.getConstant(CommonBit, VT)); @@ -2030,12 +2115,11 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, // The last case block won't fall through into 'NextBlock' if we emit the // branches in this order. See if rearranging a case value would help. // We start at the bottom as it's the case with the least weight. - for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I){ + for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I) if (I->BB == NextBlock) { std::swap(*I, BackCase); break; } - } } // Create a CaseBlock record representing a conditional branch to @@ -2062,7 +2146,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, CC = ISD::SETEQ; LHS = SV; RHS = I->High; MHS = NULL; } else { - CC = ISD::SETCC_INVALID; + CC = ISD::SETLE; LHS = I->Low; MHS = SV; RHS = I->High; } @@ -2096,7 +2180,7 @@ static inline bool areJTsAllowed(const TargetLowering &TLI) { static APInt ComputeRange(const APInt &First, const APInt &Last) { uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; - APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth); + APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth); return (LastExt - FirstExt + 1ULL); } @@ -2116,7 +2200,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) TSize += I->size(); - if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) + const TargetLowering *TLI = TM.getTargetLowering(); + if (!areJTsAllowed(*TLI) || TSize.ult(TLI->getMinimumJumpTableEntries())) return false; APInt Range = ComputeRange(First, Last); @@ -2162,7 +2247,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, const APInt &Low = cast<ConstantInt>(I->Low)->getValue(); const APInt &High = cast<ConstantInt>(I->High)->getValue(); - if (Low.ule(TEI) && TEI.ule(High)) { + if (Low.sle(TEI) && TEI.sle(High)) { DestBBs.push_back(I->BB); if (TEI==High) ++I; @@ -2177,7 +2262,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = DestWeights.find(I->BB); - if (Itr != DestWeights.end()) + if (Itr != DestWeights.end()) Itr->second += I->ExtraWeight; else DestWeights[I->BB] = I->ExtraWeight; @@ -2197,7 +2282,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, } // Create a jump table index for this jump table. - unsigned JTEncoding = TLI.getJumpTableEncoding(); + unsigned JTEncoding = TLI->getJumpTableEncoding(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) ->createJumpTableIndex(DestBBs); @@ -2217,8 +2302,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, CaseRecVector& WorkList, const Value* SV, - MachineBasicBlock *Default, - MachineBasicBlock *SwitchBB) { + MachineBasicBlock* Default, + MachineBasicBlock* SwitchBB) { // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. MachineFunction *CurMF = FuncInfo.MF; @@ -2282,7 +2367,9 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, LSize += J->size(); RSize -= J->size(); } - if (areJTsAllowed(TLI)) { + + const TargetLowering *TLI = TM.getTargetLowering(); + if (areJTsAllowed(*TLI)) { // If our case is dense we *really* should handle it earlier! assert((FMetric > 0) && "Should handle dense range earlier!"); } else { @@ -2334,7 +2421,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, // Create a CaseBlock record representing a conditional branch to // the LHS node if the value being switched on SV is less than C. // Otherwise, branch to LHS. - CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); + CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); if (CR.CaseBB == SwitchBB) visitSwitchCase(CB, SwitchBB); @@ -2351,8 +2438,9 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, CaseRecVector& WorkList, const Value* SV, MachineBasicBlock* Default, - MachineBasicBlock *SwitchBB){ - EVT PTy = TLI.getPointerTy(); + MachineBasicBlock* SwitchBB) { + const TargetLowering *TLI = TM.getTargetLowering(); + EVT PTy = TLI->getPointerTy(); unsigned IntPtrBits = PTy.getSizeInBits(); Case& FrontCase = *CR.Range.first; @@ -2363,7 +2451,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, MachineFunction *CurMF = FuncInfo.MF; // If target does not have legal shift left, do not emit bit tests at all. - if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy())) + if (!TLI->isOperationLegal(ISD::SHL, PTy)) return false; size_t numCmps = 0; @@ -2406,7 +2494,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, // Optimize the case where all the case values fit in a // word without having to subtract minValue. In this case, // we can optimize away the subtraction. - if (maxValue.ult(IntPtrBits)) { + if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { cmpRange = maxValue; } else { lowBound = minValue; @@ -2481,12 +2569,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, /// Clusterify - Transform simple list of Cases into list of CaseRange's size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, const SwitchInst& SI) { - - /// Use a shorter form of declaration, and also - /// show the we want to use CRSBuilder as Clusterifier. - typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier; - - Clusterifier TheClusterifier; + size_t numCmps = 0; BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases @@ -2495,27 +2578,40 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; - TheClusterifier.add(i.getCaseValueEx(), SMBB, - BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0); - } - - TheClusterifier.optimize(); - - size_t numCmps = 0; - for (Clusterifier::RangeIterator i = TheClusterifier.begin(), - e = TheClusterifier.end(); i != e; ++i, ++numCmps) { - Clusterifier::Cluster &C = *i; - // Update edge weight for the cluster. - unsigned W = C.first.Weight; - - // FIXME: Currently work with ConstantInt based numbers. - // Changing it to APInt based is a pretty heavy for this commit. - Cases.push_back(Case(C.first.getLow().toConstantInt(), - C.first.getHigh().toConstantInt(), C.second, W)); - - if (C.first.getLow() != C.first.getHigh()) - // A range counts double, since it requires two compares. - ++numCmps; + uint32_t ExtraWeight = + BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0; + + Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), + SMBB, ExtraWeight)); + } + std::sort(Cases.begin(), Cases.end(), CaseCmp()); + + // Merge case into clusters + if (Cases.size() >= 2) + // Must recompute end() each iteration because it may be + // invalidated by erase if we hold on to it + for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin()); + J != Cases.end(); ) { + const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); + const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); + MachineBasicBlock* nextBB = J->BB; + MachineBasicBlock* currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { + I->High = J->High; + I->ExtraWeight += J->ExtraWeight; + J = Cases.erase(J); + } else { + I = J++; + } + } + + for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { + if (I->Low != I->High) + // A range counts double, since it requires two compares. + ++numCmps; } return numCmps; @@ -2549,7 +2645,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // If this is not a fall-through branch, emit the branch. SwitchMBB->addSuccessor(Default); if (Default != NextBlock) - DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Default))); @@ -2616,7 +2712,7 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { addSuccessorWithWeight(IndirectBrMBB, Succ); } - DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(), MVT::Other, getControlRoot(), getValue(I.getAddress()))); } @@ -2627,7 +2723,7 @@ void SelectionDAGBuilder::visitFSub(const User &I) { if (isa<Constant>(I.getOperand(0)) && I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(), Op2.getValueType(), Op2)); return; } @@ -2638,7 +2734,7 @@ void SelectionDAGBuilder::visitFSub(const User &I) { void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(), + setValue(&I, DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), Op1, Op2)); } @@ -2646,13 +2742,13 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - EVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType()); + EVT ShiftTy = TM.getTargetLowering()->getShiftAmountTy(Op2.getValueType()); // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { unsigned ShiftSize = ShiftTy.getSizeInBits(); unsigned Op2Size = Op2.getValueType().getSizeInBits(); - DebugLoc DL = getCurDebugLoc(); + SDLoc DL = getCurSDLoc(); // If the operand is smaller than the shift count type, promote it. if (ShiftSize > Op2Size) @@ -2670,7 +2766,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); } - setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2)); } @@ -2684,9 +2780,10 @@ void SelectionDAGBuilder::visitSDiv(const User &I) { if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() && !isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue()) - setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG)); + setValue(&I, TM.getTargetLowering()->BuildExactSDIV(Op1, Op2, + getCurSDLoc(), DAG)); else - setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(), + setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, Op2)); } @@ -2700,8 +2797,8 @@ void SelectionDAGBuilder::visitICmp(const User &I) { SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); } void SelectionDAGBuilder::visitFCmp(const User &I) { @@ -2715,13 +2812,13 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { ISD::CondCode Condition = getFCmpCondCode(predicate); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); } void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I.getType(), ValueVTs); + ComputeValueVTs(*TM.getTargetLowering(), I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -2733,7 +2830,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { ISD::VSELECT : ISD::SELECT; for (unsigned i = 0; i != NumValues; ++i) - Values[i] = DAG.getNode(OpCode, getCurDebugLoc(), + Values[i] = DAG.getNode(OpCode, getCurSDLoc(), TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), Cond, SDValue(TrueVal.getNode(), @@ -2741,7 +2838,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { SDValue(FalseVal.getNode(), FalseVal.getResNo() + i)); - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&ValueVTs[0], NumValues), &Values[0], NumValues)); } @@ -2749,117 +2846,134 @@ void SelectionDAGBuilder::visitSelect(const User &I) { void SelectionDAGBuilder::visitTrunc(const User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitZExt(const User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitSExt(const User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), + const TargetLowering *TLI = TM.getTargetLowering(); + EVT DestVT = TLI->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N, - DAG.getTargetConstant(0, TLI.getPointerTy()))); + DAG.getTargetConstant(0, TLI->getPointerTy()))); } -void SelectionDAGBuilder::visitFPExt(const User &I){ +void SelectionDAGBuilder::visitFPExt(const User &I) { // FPExt is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToUI(const User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToSI(const User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitUIToFP(const User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitSIToFP(const User &I){ +void SelectionDAGBuilder::visitSIToFP(const User &I) { // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); // BitCast assures us that source and destination are the same size so this is // either a BITCAST or a no-op. if (DestVT != N.getValueType()) - setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(), DestVT, N)); // convert types. else setValue(&I, N); // noop cast. } +void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const Value *SV = I.getOperand(0); + SDValue N = getValue(SV); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + + unsigned SrcAS = SV->getType()->getPointerAddressSpace(); + unsigned DestAS = I.getType()->getPointerAddressSpace(); + + if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) + N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); + + setValue(&I, N); +} + void SelectionDAGBuilder::visitInsertElement(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); - SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), - TLI.getPointerTy(), - getValue(I.getOperand(2))); - setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(), - TLI.getValueType(I.getType()), + SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), + getCurSDLoc(), TLI.getVectorIdxTy()); + setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), + TM.getTargetLowering()->getValueType(I.getType()), InVec, InVal, InIdx)); } void SelectionDAGBuilder::visitExtractElement(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); - SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), - TLI.getPointerTy(), - getValue(I.getOperand(1))); - setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - TLI.getValueType(I.getType()), InVec, InIdx)); + SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), + getCurSDLoc(), TLI.getVectorIdxTy()); + setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), + TM.getTargetLowering()->getValueType(I.getType()), + InVec, InIdx)); } // Utility for visitShuffleVector - Return true if every element in Mask, @@ -2880,13 +2994,14 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SmallVector<int, 8> Mask; ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); unsigned MaskNumElts = Mask.size(); - - EVT VT = TLI.getValueType(I.getType()); + + const TargetLowering *TLI = TM.getTargetLowering(); + EVT VT = TLI->getValueType(I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); if (SrcNumElts == MaskNumElts) { - setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, &Mask[0])); return; } @@ -2901,7 +3016,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (isSequentialInRange(Mask, 0, SrcNumElts, 0) && isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) { // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), VT, Src1, Src2)); return; } @@ -2909,7 +3024,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) && isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) { // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), VT, Src2, Src1)); return; } @@ -2927,10 +3042,10 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MOps2[0] = Src2; Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurDebugLoc(), VT, + getCurSDLoc(), VT, &MOps1[0], NumConcat); Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurDebugLoc(), VT, + getCurSDLoc(), VT, &MOps2[0], NumConcat); // Readjust mask for new input vector length. @@ -2942,7 +3057,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MappedOps.push_back(Idx); } - setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, &MappedOps[0])); return; } @@ -3002,8 +3117,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); else - Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT, - Src, DAG.getIntPtrConstant(StartIdx[Input])); + Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, + Src, DAG.getConstant(StartIdx[Input], + TLI->getVectorIdxTy())); } // Calculate new mask. @@ -3019,7 +3135,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MappedOps.push_back(Idx); } - setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, &MappedOps[0])); return; } @@ -3029,7 +3145,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // replacing the shuffle with extract and build vector. // to insert and build vector. EVT EltVT = VT.getVectorElementType(); - EVT PtrVT = TLI.getPointerTy(); + EVT IdxVT = TLI->getVectorIdxTy(); SmallVector<SDValue,8> Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; @@ -3041,14 +3157,14 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - EltVT, Src, DAG.getConstant(Idx, PtrVT)); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), + EltVT, Src, DAG.getConstant(Idx, IdxVT)); } Ops.push_back(Res); } - setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, &Ops[0], Ops.size())); } @@ -3062,10 +3178,11 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + const TargetLowering *TLI = TM.getTargetLowering(); SmallVector<EVT, 4> AggValueVTs; - ComputeValueVTs(TLI, AggTy, AggValueVTs); + ComputeValueVTs(*TLI, AggTy, AggValueVTs); SmallVector<EVT, 4> ValValueVTs; - ComputeValueVTs(TLI, ValTy, ValValueVTs); + ComputeValueVTs(*TLI, ValTy, ValValueVTs); unsigned NumAggValues = AggValueVTs.size(); unsigned NumValValues = ValValueVTs.size(); @@ -3089,7 +3206,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Agg.getNode(), Agg.getResNo() + i); - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&AggValueVTs[0], NumAggValues), &Values[0], NumAggValues)); } @@ -3102,8 +3219,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + const TargetLowering *TLI = TM.getTargetLowering(); SmallVector<EVT, 4> ValValueVTs; - ComputeValueVTs(TLI, ValTy, ValValueVTs); + ComputeValueVTs(*TLI, ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); @@ -3123,16 +3241,18 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : SDValue(Agg.getNode(), Agg.getResNo() + i); - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&ValValueVTs[0], NumValValues), &Values[0], NumValValues)); } void SelectionDAGBuilder::visitGetElementPtr(const User &I) { - SDValue N = getValue(I.getOperand(0)); + Value *Op0 = I.getOperand(0); // Note that the pointer operand may be a vector of pointers. Take the scalar // element which holds a pointer. - Type *Ty = I.getOperand(0)->getType()->getScalarType(); + Type *Ty = Op0->getType()->getScalarType(); + unsigned AS = Ty->getPointerAddressSpace(); + SDValue N = getValue(Op0); for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { @@ -3142,7 +3262,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (Field) { // N = N + Offset uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); - N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, DAG.getConstant(Offset, N.getValueType())); } @@ -3151,50 +3271,50 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Ty = cast<SequentialType>(Ty)->getElementType(); // If this is a constant subscript, handle it quickly. + const TargetLowering *TLI = TM.getTargetLowering(); if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; uint64_t Offs = TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); SDValue OffsVal; - EVT PTy = TLI.getPointerTy(); + EVT PTy = TLI->getPointerTy(AS); unsigned PtrBits = PTy.getSizeInBits(); if (PtrBits < 64) - OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - TLI.getPointerTy(), + OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy, DAG.getConstant(Offs, MVT::i64)); else - OffsVal = DAG.getIntPtrConstant(Offs); + OffsVal = DAG.getConstant(Offs, PTy); - N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal); continue; } // N = N + Idx * ElementSize; - APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(), + APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS), TD->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); // If the index is smaller or larger than intptr_t, truncate or extend // it. - IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType()); + IdxN = DAG.getSExtOrTrunc(IdxN, getCurSDLoc(), N.getValueType()); // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { unsigned Amt = ElementSize.logBase2(); - IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(), + IdxN = DAG.getNode(ISD::SHL, getCurSDLoc(), N.getValueType(), IdxN, DAG.getConstant(Amt, IdxN.getValueType())); } else { SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType()); - IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), + IdxN = DAG.getNode(ISD::MUL, getCurSDLoc(), N.getValueType(), IdxN, Scale); } } - N = DAG.getNode(ISD::ADD, getCurDebugLoc(), + N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, IdxN); } } @@ -3209,18 +3329,19 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { return; // getValue will auto-populate this. Type *Ty = I.getAllocatedType(); - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + const TargetLowering *TLI = TM.getTargetLowering(); + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), + std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), I.getAlignment()); SDValue AllocSize = getValue(I.getArraySize()); - EVT IntPtr = TLI.getPointerTy(); + EVT IntPtr = TLI->getPointerTy(); if (AllocSize.getValueType() != IntPtr) - AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); + AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr); - AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr, + AllocSize = DAG.getNode(ISD::MUL, getCurSDLoc(), IntPtr, AllocSize, DAG.getConstant(TySize, IntPtr)); @@ -3233,18 +3354,18 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Round the size of the allocation up to the stack alignment size // by add SA-1 to the size. - AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(), + AllocSize = DAG.getNode(ISD::ADD, getCurSDLoc(), AllocSize.getValueType(), AllocSize, DAG.getIntPtrConstant(StackAlign-1)); // Mask out the low bits for alignment purposes. - AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(), + AllocSize = DAG.getNode(ISD::AND, getCurSDLoc(), AllocSize.getValueType(), AllocSize, DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); - SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(), + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops, 3); setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); @@ -3272,7 +3393,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); + ComputeValueVTs(*TM.getTargetLowering(), Ty, ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3306,15 +3427,15 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // (MaxParallelChains should always remain as failsafe). if (ChainI == MaxParallelChains) { assert(PendingLoads.empty() && "PendingLoads must be serialized first"); - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], ChainI); Root = Chain; ChainI = 0; } - SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(), + SDValue A = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, + SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, isNonTemporal, isInvariant, Alignment, TBAAInfo, Ranges); @@ -3324,7 +3445,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } if (!ConstantMemory) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], ChainI); if (isVolatile) DAG.setRoot(Chain); @@ -3332,7 +3453,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { PendingLoads.push_back(Chain); } - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&ValueVTs[0], NumValues), &Values[0], NumValues)); } @@ -3346,7 +3467,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets); + ComputeValueVTs(*TM.getTargetLowering(), SrcV->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3370,30 +3491,28 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. if (ChainI == MaxParallelChains) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], ChainI); Root = Chain; ChainI = 0; } - SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, + SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)); - SDValue St = DAG.getStore(Root, getCurDebugLoc(), + SDValue St = DAG.getStore(Root, getCurSDLoc(), SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), isVolatile, isNonTemporal, Alignment, TBAAInfo); Chains[ChainI] = St; } - SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], ChainI); - ++SDNodeOrder; - AssignOrderingToNode(StoreNode.getNode()); DAG.setRoot(StoreNode); } static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, SynchronizationScope Scope, - bool Before, DebugLoc dl, + bool Before, SDLoc dl, SelectionDAG &DAG, const TargetLowering &TLI) { // Fence, if necessary @@ -3416,39 +3535,40 @@ static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, } void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); - if (TLI.getInsertFencesForAtomic()) + const TargetLowering *TLI = TM.getTargetLowering(); + if (TLI->getInsertFencesForAtomic()) InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, TLI); + DAG, *TLI); SDValue L = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, - getValue(I.getCompareOperand()).getValueType().getSimpleVT(), + getValue(I.getCompareOperand()).getSimpleValueType(), InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, - TLI.getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, TLI); + DAG, *TLI); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); ISD::NodeType NT; switch (I.getOperation()) { default: llvm_unreachable("Unknown atomicrmw operation"); @@ -3469,47 +3589,50 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { SDValue InChain = getRoot(); - if (TLI.getInsertFencesForAtomic()) + const TargetLowering *TLI = TM.getTargetLowering(); + if (TLI->getInsertFencesForAtomic()) InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, TLI); + DAG, *TLI); SDValue L = DAG.getAtomic(NT, dl, - getValue(I.getValOperand()).getValueType().getSimpleVT(), + getValue(I.getValOperand()).getSimpleValueType(), InChain, getValue(I.getPointerOperand()), getValue(I.getValOperand()), I.getPointerOperand(), 0 /* Alignment */, - TLI.getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, TLI); + DAG, *TLI); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitFence(const FenceInst &I) { - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); + const TargetLowering *TLI = TM.getTargetLowering(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); - Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); + Ops[1] = DAG.getConstant(I.getOrdering(), TLI->getPointerTy()); + Ops[2] = DAG.getConstant(I.getSynchScope(), TLI->getPointerTy()); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3)); } void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); - EVT VT = TLI.getValueType(I.getType()); + const TargetLowering *TLI = TM.getTargetLowering(); + EVT VT = TLI->getValueType(I.getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); @@ -3518,35 +3641,36 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, getValue(I.getPointerOperand()), I.getPointerOperand(), I.getAlignment(), - TLI.getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, TLI); + DAG, *TLI); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); - EVT VT = TLI.getValueType(I.getValueOperand()->getType()); + const TargetLowering *TLI = TM.getTargetLowering(); + EVT VT = TLI->getValueType(I.getValueOperand()->getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, TLI); + DAG, *TLI); SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, @@ -3554,12 +3678,12 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { getValue(I.getPointerOperand()), getValue(I.getValueOperand()), I.getPointerOperand(), I.getAlignment(), - TLI.getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, Scope); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, TLI); + DAG, *TLI); DAG.setRoot(OutChain); } @@ -3584,12 +3708,13 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Info is set by getTgtMemInstrinsic TargetLowering::IntrinsicInfo Info; - bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); + const TargetLowering *TLI = TM.getTargetLowering(); + bool IsTgtIntrinsic = TLI->getTgtMemIntrinsic(Info, I, Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); + Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI->getPointerTy())); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { @@ -3598,7 +3723,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I.getType(), ValueVTs); + ComputeValueVTs(*TLI, I.getType(), ValueVTs); if (HasChain) ValueVTs.push_back(MVT::Other); @@ -3609,20 +3734,20 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, SDValue Result; if (IsTgtIntrinsic) { // This is target intrinsic that touches memory - Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(), + Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, &Ops[0], Ops.size(), Info.memVT, MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, Info.vol, Info.readMem, Info.writeMem); } else if (!HasChain) { - Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(), + Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, &Ops[0], Ops.size()); } else if (!I.getType()->isVoidTy()) { - Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(), + Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, &Ops[0], Ops.size()); } else { - Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(), + Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, &Ops[0], Ops.size()); } @@ -3636,17 +3761,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (!I.getType()->isVoidTy()) { if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { - EVT VT = TLI.getValueType(PTy); - Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result); + EVT VT = TLI->getValueType(PTy); + Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); } setValue(&I, Result); - } else { - // Assign order to result here. If the intrinsic does not produce a result, - // it won't be mapped to a SDNode and visit() will not assign it an order - // number. - ++SDNodeOrder; - AssignOrderingToNode(Result.getNode()); } } @@ -3657,7 +3776,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, /// /// where Op is the hexadecimal representation of floating point value. static SDValue -GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { +GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x007fffff, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, @@ -3672,7 +3791,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { /// where Op is the hexadecimal representation of floating point value. static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, - DebugLoc dl) { + SDLoc dl) { SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x7f800000, MVT::i32)); SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, @@ -3691,7 +3810,7 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) { /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandExp(DebugLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -3794,7 +3913,7 @@ static SDValue expandExp(DebugLoc dl, SDValue Op, SelectionDAG &DAG, /// expandLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog(DebugLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -3890,7 +4009,7 @@ static SDValue expandLog(DebugLoc dl, SDValue Op, SelectionDAG &DAG, /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -3985,7 +4104,7 @@ static SDValue expandLog2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog10(DebugLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -4073,7 +4192,7 @@ static SDValue expandLog10(DebugLoc dl, SDValue Op, SelectionDAG &DAG, /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandExp2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -4168,10 +4287,10 @@ static SDValue expandExp2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. -static SDValue expandPow(DebugLoc dl, SDValue LHS, SDValue RHS, +static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const TargetLowering &TLI) { bool IsExp10 = false; - if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 && + if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) { APFloat Ten(10.0f); @@ -4276,7 +4395,7 @@ static SDValue expandPow(DebugLoc dl, SDValue LHS, SDValue RHS, /// ExpandPowI - Expand a llvm.powi intrinsic. -static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, +static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, SelectionDAG &DAG) { // If RHS is a constant, we can expand this out to a multiplication tree, // otherwise we end up lowering to a call to __powidf2 (for example). When @@ -4335,7 +4454,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) { return 0; const SDValue &Ext = N.getOperand(0); - if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){ + if (Ext.getOpcode() == ISD::AssertZext || + Ext.getOpcode() == ISD::AssertSext) { const SDValue &CFR = Ext.getOperand(0); if (CFR.getOpcode() == ISD::CopyFromReg) return cast<RegisterSDNode>(CFR.getOperand(1))->getReg(); @@ -4358,20 +4478,19 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); - const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); // Ignore inlined function arguments here. DIVariable DV(Variable); if (DV.isInlinedFnArgument(MF.getFunction())) return false; - unsigned Reg = 0; + Optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. - Offset = FuncInfo.getArgumentFrameIndex(Arg); - if (Offset) - Reg = TRI->getFrameRegister(MF); + if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) + Op = MachineOperand::CreateFI(FI); - if (!Reg && N.getNode()) { + if (!Op && N.getNode()) { + unsigned Reg; if (N.getOpcode() == ISD::CopyFromReg) Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); else @@ -4382,32 +4501,39 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, if (PR) Reg = PR; } + if (Reg) + Op = MachineOperand::CreateReg(Reg, false); } - if (!Reg) { + if (!Op) { // Check if ValueMap has reg number. DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) - Reg = VMI->second; + Op = MachineOperand::CreateReg(VMI->second, false); } - if (!Reg && N.getNode()) { + if (!Op && N.getNode()) // Check if frame index is available. if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) if (FrameIndexSDNode *FINode = - dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) { - Reg = TRI->getFrameRegister(MF); - Offset = FINode->getIndex(); - } - } + dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) + Op = MachineOperand::CreateFI(FINode->getIndex()); - if (!Reg) + if (!Op) return false; - MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(), - TII->get(TargetOpcode::DBG_VALUE)) - .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable); - FuncInfo.ArgDbgValues.push_back(&*MIB); + // FIXME: This does not handle register-indirect values at offset 0. + bool IsIndirect = Offset != 0; + if (Op->isReg()) + FuncInfo.ArgDbgValues.push_back(BuildMI(MF, getCurDebugLoc(), + TII->get(TargetOpcode::DBG_VALUE), + IsIndirect, + Op->getReg(), Offset, Variable)); + else + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) + .addOperand(*Op).addImm(Offset).addMetadata(Variable)); + return true; } @@ -4424,6 +4550,8 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, /// otherwise lower it and return null. const char * SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { + const TargetLowering *TLI = TM.getTargetLowering(); + SDLoc sdl = getCurSDLoc(); DebugLoc dl = getCurDebugLoc(); SDValue Res; @@ -4436,17 +4564,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::vaend: visitVAEnd(I); return 0; case Intrinsic::vacopy: visitVACopy(I); return 0; case Intrinsic::returnaddress: - setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(), + setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); return 0; case Intrinsic::frameaddress: - setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), + setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); return 0; case Intrinsic::setjmp: - return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; + return &"_setjmp"[!TLI->usesUnderscoreSetJmp()]; case Intrinsic::longjmp: - return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; + return &"_longjmp"[!TLI->usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { // Assert for address < 256 since we support only user defined address // spaces. @@ -4462,7 +4590,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, + DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)))); return 0; @@ -4480,7 +4608,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, + DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)))); return 0; } @@ -4499,7 +4627,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, + DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)))); return 0; @@ -4508,17 +4636,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); MDNode *Variable = DI.getVariable(); const Value *Address = DI.getAddress(); - if (!Address || !DIVariable(Variable).Verify()) { + DIVariable DIVar(Variable); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!Address || !DIVar) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return 0; } - // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder - // but do not always have a corresponding SDNode built. The SDNodeOrder - // absolute, but not relative, values are different depending on whether - // debug info exists. - ++SDNodeOrder; - // Check if address has undef value. if (isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { @@ -4589,7 +4714,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast<DbgValueInst>(I); - if (!DIVariable(DI.getVariable()).Verify()) + DIVariable DIVar(DI.getVariable()); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgValueInst should be either null or a DIVariable."); + if (!DIVar) return 0; MDNode *Variable = DI.getVariable(); @@ -4598,11 +4726,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!V) return 0; - // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder - // but do not always have a corresponding SDNode built. The SDNodeOrder - // absolute, but not relative, values are different depending on whether - // debug info exists. - ++SDNodeOrder; SDDbgValue *SDV; if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); @@ -4666,7 +4789,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: DAG.getMachineFunction().getMMI().setCallsEHReturn(true); - DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl, + DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl, MVT::Other, getControlRoot(), getValue(I.getArgOperand(0)), @@ -4676,17 +4799,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); return 0; case Intrinsic::eh_dwarf_cfa: { - SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl, - TLI.getPointerTy()); - SDValue Offset = DAG.getNode(ISD::ADD, dl, - TLI.getPointerTy(), - DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl, - TLI.getPointerTy()), + SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, + TLI->getPointerTy()); + SDValue Offset = DAG.getNode(ISD::ADD, sdl, + CfaArg.getValueType(), + DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, + CfaArg.getValueType()), CfaArg); - SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl, - TLI.getPointerTy(), - DAG.getConstant(0, TLI.getPointerTy())); - setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), + SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, + TLI->getPointerTy(), + DAG.getConstant(0, TLI->getPointerTy())); + setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), FA, Offset)); return 0; } @@ -4712,7 +4835,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[2]; Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); - SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, + SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl, DAG.getVTList(MVT::i32, MVT::Other), Ops, 2); setValue(&I, Op.getValue(0)); @@ -4720,7 +4843,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::eh_sjlj_longjmp: { - DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other, + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, getRoot(), getValue(I.getArgOperand(0)))); return 0; } @@ -4775,10 +4898,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue ShOps[2]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); - EVT DestVT = TLI.getValueType(I.getType()); - ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt); - Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, &ShOps[0], 2); + EVT DestVT = TLI->getValueType(I.getType()); + ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); + Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, DAG.getConstant(NewIntrinsic, MVT::i32), getValue(I.getArgOperand(0)), ShAmt); setValue(&I, Res); @@ -4788,14 +4911,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vinsertf128_ps_256: case Intrinsic::x86_avx_vinsertf128_si_256: case Intrinsic::x86_avx2_vinserti128: { - EVT DestVT = TLI.getValueType(I.getType()); - EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); + EVT DestVT = TLI->getValueType(I.getType()); + EVT ElVT = TLI->getValueType(I.getArgOperand(1)->getType()); uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * ElVT.getVectorNumElements(); - Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT, + Res = DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI->getVectorIdxTy())); setValue(&I, Res); return 0; } @@ -4803,12 +4926,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vextractf128_ps_256: case Intrinsic::x86_avx_vextractf128_si_256: case Intrinsic::x86_avx2_vextracti128: { - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI->getValueType(I.getType()); uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * DestVT.getVectorNumElements(); - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT, getValue(I.getArgOperand(0)), - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI->getVectorIdxTy())); setValue(&I, Res); return 0; } @@ -4834,9 +4957,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertus: Code = ISD::CVT_US; break; case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI->getValueType(I.getType()); const Value *Op1 = I.getArgOperand(0); - Res = DAG.getConvertRndSat(DestVT, dl, getValue(Op1), + Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), DAG.getValueType(DestVT), DAG.getValueType(getValue(Op1).getValueType()), getValue(I.getArgOperand(1)), @@ -4846,27 +4969,27 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::powi: - setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)), + setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); return 0; case Intrinsic::log: - setValue(&I, expandLog(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::log2: - setValue(&I, expandLog2(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::log10: - setValue(&I, expandLog10(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::exp: - setValue(&I, expandExp(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::exp2: - setValue(&I, expandExp2(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::pow: - setValue(&I, expandPow(dl, getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), DAG, TLI)); + setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG, *TLI)); return 0; case Intrinsic::sqrt: case Intrinsic::fabs: @@ -4876,7 +4999,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::ceil: case Intrinsic::trunc: case Intrinsic::rint: - case Intrinsic::nearbyint: { + case Intrinsic::nearbyint: + case Intrinsic::round: { unsigned Opcode; switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. @@ -4889,35 +5013,42 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + case Intrinsic::round: Opcode = ISD::FROUND; break; } - setValue(&I, DAG.getNode(Opcode, dl, + setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return 0; } + case Intrinsic::copysign: + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return 0; case Intrinsic::fma: - setValue(&I, DAG.getNode(ISD::FMA, dl, + setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); return 0; case Intrinsic::fmuladd: { - EVT VT = TLI.getValueType(I.getType()); + EVT VT = TLI->getValueType(I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI.isFMAFasterThanMulAndAdd(VT)){ - setValue(&I, DAG.getNode(ISD::FMA, dl, + TLI->isFMAFasterThanFMulAndFAdd(VT)) { + setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); } else { - SDValue Mul = DAG.getNode(ISD::FMUL, dl, + SDValue Mul = DAG.getNode(ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1))); - SDValue Add = DAG.getNode(ISD::FADD, dl, + SDValue Add = DAG.getNode(ISD::FADD, sdl, getValue(I.getArgOperand(0)).getValueType(), Mul, getValue(I.getArgOperand(2))); @@ -4926,21 +5057,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::convert_to_fp16: - setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, + setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, sdl, MVT::i16, getValue(I.getArgOperand(0)))); return 0; case Intrinsic::convert_from_fp16: - setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl, + setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, sdl, MVT::f32, getValue(I.getArgOperand(0)))); return 0; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); - DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp)); + DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); return 0; } case Intrinsic::readcyclecounter: { SDValue Op = getRoot(); - Res = DAG.getNode(ISD::READCYCLECOUNTER, dl, + Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl, DAG.getVTList(MVT::i64, MVT::Other), &Op, 1); setValue(&I, Res); @@ -4948,7 +5079,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::bswap: - setValue(&I, DAG.getNode(ISD::BSWAP, dl, + setValue(&I, DAG.getNode(ISD::BSWAP, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return 0; @@ -4957,7 +5088,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, - dl, Ty, Arg)); + sdl, Ty, Arg)); return 0; } case Intrinsic::ctlz: { @@ -4965,33 +5096,33 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, - dl, Ty, Arg)); + sdl, Ty, Arg)); return 0; } case Intrinsic::ctpop: { SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); - setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg)); + setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); return 0; } case Intrinsic::stacksave: { SDValue Op = getRoot(); - Res = DAG.getNode(ISD::STACKSAVE, dl, - DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1); + Res = DAG.getNode(ISD::STACKSAVE, sdl, + DAG.getVTList(TLI->getPointerTy(), MVT::Other), &Op, 1); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return 0; } case Intrinsic::stackrestore: { Res = getValue(I.getArgOperand(0)); - DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res)); + DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); return 0; } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - EVT PtrTy = TLI.getPointerTy(); + EVT PtrTy = TLI->getPointerTy(); SDValue Src = getValue(I.getArgOperand(0)); // The guard's value. AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); @@ -5002,7 +5133,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. - Res = DAG.getStore(getRoot(), dl, Src, FIN, + Res = DAG.getStore(getRoot(), sdl, Src, FIN, MachinePointerInfo::getFixedStack(FI), true, false, 0); setValue(&I, Res); @@ -5046,14 +5177,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); Ops[5] = DAG.getSrcValue(F); - Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6); + Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops, 6); DAG.setRoot(Res); return 0; } case Intrinsic::adjust_trampoline: { - setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl, - TLI.getPointerTy(), + setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, + TLI->getPointerTy(), getValue(I.getArgOperand(0)))); return 0; } @@ -5070,7 +5201,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); case Intrinsic::flt_rounds: - setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); + setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); return 0; case Intrinsic::expect: { @@ -5083,9 +5214,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::trap: { StringRef TrapFuncName = TM.Options.getTrapFunctionName(); if (TrapFuncName.empty()) { - ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? + ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? ISD::TRAP : ISD::DEBUGTRAP; - DAG.setRoot(DAG.getNode(Op, dl,MVT::Other, getRoot())); + DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); return 0; } TargetLowering::ArgListTy Args; @@ -5094,9 +5225,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), - Args, DAG, dl); - std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); + DAG.getExternalSymbol(TrapFuncName.data(), + TLI->getPointerTy()), + Args, DAG, sdl); + std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI); DAG.setRoot(Result.second); return 0; } @@ -5121,7 +5253,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op2 = getValue(I.getArgOperand(1)); SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); - setValue(&I, DAG.getNode(Op, dl, VTs, Op1, Op2)); + setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); return 0; } case Intrinsic::prefetch: { @@ -5132,7 +5264,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); Ops[4] = getValue(I.getArgOperand(3)); - DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl, + DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), &Ops[0], 5, EVT::getIntegerVT(*Context, 8), @@ -5153,8 +5285,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SmallVector<Value *, 4> Allocas; GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD); - for (SmallVector<Value*, 4>::iterator Object = Allocas.begin(), - E = Allocas.end(); Object != E; ++Object) { + for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), + E = Allocas.end(); Object != E; ++Object) { AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); // Could not find an Alloca. @@ -5165,24 +5297,45 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[2]; Ops[0] = getRoot(); - Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); + Ops[1] = DAG.getFrameIndex(FI, TLI->getPointerTy(), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); - Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2); + Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops, 2); DAG.setRoot(Res); } return 0; } case Intrinsic::invariant_start: // Discard region information. - setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); + setValue(&I, DAG.getUNDEF(TLI->getPointerTy())); return 0; case Intrinsic::invariant_end: // Discard region information. return 0; + case Intrinsic::stackprotectorcheck: { + // Do not actually emit anything for this basic block. Instead we initialize + // the stack protector descriptor and export the guard variable so we can + // access it in FinishBasicBlock. + const BasicBlock *BB = I.getParent(); + SPDescriptor.initialize(BB, FuncInfo.MBBMap[BB], I); + ExportFromCurrentBlock(SPDescriptor.getGuard()); + + // Flush our exports since we are going to process a terminator. + (void)getControlRoot(); + return 0; + } case Intrinsic::donothing: // ignore return 0; + case Intrinsic::experimental_stackmap: { + visitStackmap(I); + return 0; + } + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: { + visitPatchpoint(I); + return 0; + } } } @@ -5201,26 +5354,27 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(RetTy, CS.getAttributes(), Outs, TLI); + const TargetLowering *TLI = TM.getTargetLowering(); + GetReturnInfo(RetTy, CS.getAttributes(), Outs, *TLI); - bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - DAG.getMachineFunction(), - FTy->isVarArg(), Outs, - FTy->getContext()); + bool CanLowerReturn = TLI->CanLowerReturn(CS.getCallingConv(), + DAG.getMachineFunction(), + FTy->isVarArg(), Outs, + FTy->getContext()); SDValue DemoteStackSlot; int DemoteStackIdx = -100; if (!CanLowerReturn) { - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize( + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize( FTy->getReturnType()); - unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment( + unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment( FTy->getReturnType()); MachineFunction &MF = DAG.getMachineFunction(); DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); - DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy()); + DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI->getPointerTy()); Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; Entry.isSExt = false; @@ -5246,15 +5400,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue ArgNode = getValue(V); Entry.Node = ArgNode; Entry.Ty = V->getType(); - unsigned attrInd = i - CS.arg_begin() + 1; - Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); - Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); - Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); - Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); - Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); - Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); - Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned); - Entry.Alignment = CS.getParamAlignment(attrInd); + // Skip the first return-type Attribute to get to params. + Entry.setAttributes(&CS, i - CS.arg_begin() + 1); Args.push_back(Entry); } @@ -5277,18 +5424,18 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Both PendingLoads and PendingExports must be flushed here; // this call might not return. (void)getRoot(); - DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel)); + DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel)); } // Check if target-independent constraints permit a tail call here. - // Target-dependent constraints are checked within TLI.LowerCallTo. - if (isTailCall && !isInTailCallPosition(CS, TLI)) + // Target-dependent constraints are checked within TLI->LowerCallTo. + if (isTailCall && !isInTailCallPosition(CS, *TLI)) isTailCall = false; TargetLowering:: CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG, - getCurDebugLoc(), CS); - std::pair<SDValue,SDValue> Result = TLI.LowerCallTo(CLI); + getCurSDLoc(), CS); + std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI); assert((isTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && @@ -5301,59 +5448,57 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SmallVector<EVT, 1> PVTs; Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); - ComputeValueVTs(TLI, PtrRetTy, PVTs); + ComputeValueVTs(*TLI, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); EVT PtrVT = PVTs[0]; SmallVector<EVT, 4> RetTys; SmallVector<uint64_t, 4> Offsets; RetTy = FTy->getReturnType(); - ComputeValueVTs(TLI, RetTy, RetTys, &Offsets); + ComputeValueVTs(*TLI, RetTy, RetTys, &Offsets); unsigned NumValues = RetTys.size(); SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(NumValues); for (unsigned i = 0; i < NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, + SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, DemoteStackSlot, DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(RetTys[i], getCurDebugLoc(), Result.second, Add, + SDValue L = DAG.getLoad(RetTys[i], getCurSDLoc(), Result.second, Add, MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, false, false, 1); Values[i] = L; Chains[i] = L.getValue(1); } - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], NumValues); PendingLoads.push_back(Chain); setValue(CS.getInstruction(), - DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&RetTys[0], RetTys.size()), &Values[0], Values.size())); } - // Assign order to nodes here. If the call does not produce a result, it won't - // be mapped to a SDNode and visit() will not assign it an order number. if (!Result.second.getNode()) { - // As a special case, a null chain means that a tail call has been emitted and - // the DAG root is already updated. + // As a special case, a null chain means that a tail call has been emitted + // and the DAG root is already updated. HasTailCall = true; - ++SDNodeOrder; - AssignOrderingToNode(DAG.getRoot().getNode()); + + // Since there's no actual continuation from this block, nothing can be + // relying on us setting vregs for them. + PendingExports.clear(); } else { DAG.setRoot(Result.second); - ++SDNodeOrder; - AssignOrderingToNode(Result.second.getNode()); } if (LandingPad) { // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); - DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel)); + DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. MMI.addInvoke(LandingPad, BeginLabel, EndLabel); @@ -5408,10 +5553,10 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, } SDValue Ptr = Builder.getValue(PtrVal); - SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, + SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr, MachinePointerInfo(PtrVal), false /*volatile*/, - false /*nontemporal*/, + false /*nontemporal*/, false /*isinvariant*/, 1 /* align=1 */); if (!ConstantMemory) @@ -5419,6 +5564,18 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, return LoadVal; } +/// processIntegerCallValue - Record the value for an instruction that +/// produces an integer result, converting the type where necessary. +void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, + SDValue Value, + bool IsSigned) { + EVT VT = TM.getTargetLowering()->getValueType(I.getType(), true); + if (IsSigned) + Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); + else + Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT); + setValue(&I, Value); +} /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. /// If so, return true and lower it, otherwise return false and it will be @@ -5434,15 +5591,33 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { !I.getType()->isIntegerTy()) return false; - const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2)); + const Value *Size = I.getArgOperand(2); + const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); + if (CSize && CSize->getZExtValue() == 0) { + EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true); + setValue(&I, DAG.getConstant(0, CallVT)); + return true; + } + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(LHS), getValue(RHS), getValue(Size), + MachinePointerInfo(LHS), + MachinePointerInfo(RHS)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, true); + PendingLoads.push_back(Res.second); + return true; + } // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 - if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) { + if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) { bool ActuallyDoIt = true; MVT LoadVT; Type *LoadTy; - switch (Size->getZExtValue()) { + switch (CSize->getZExtValue()) { default: LoadVT = MVT::Other; LoadTy = 0; @@ -5450,20 +5625,20 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { break; case 2: LoadVT = MVT::i16; - LoadTy = Type::getInt16Ty(Size->getContext()); + LoadTy = Type::getInt16Ty(CSize->getContext()); break; case 4: LoadVT = MVT::i32; - LoadTy = Type::getInt32Ty(Size->getContext()); + LoadTy = Type::getInt32Ty(CSize->getContext()); break; case 8: LoadVT = MVT::i64; - LoadTy = Type::getInt64Ty(Size->getContext()); + LoadTy = Type::getInt64Ty(CSize->getContext()); break; /* case 16: LoadVT = MVT::v4i32; - LoadTy = Type::getInt32Ty(Size->getContext()); + LoadTy = Type::getInt32Ty(CSize->getContext()); LoadTy = VectorType::get(LoadTy, 4); break; */ @@ -5476,10 +5651,11 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // Require that we can find a legal MVT, and only do this if the target // supports unaligned loads of that type. Expanding into byte loads would // bloat the code. - if (ActuallyDoIt && Size->getZExtValue() > 4) { + const TargetLowering *TLI = TM.getTargetLowering(); + if (ActuallyDoIt && CSize->getZExtValue() > 4) { // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. - if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT)) + if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT)) ActuallyDoIt = false; } @@ -5487,10 +5663,9 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); - SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal, + SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, ISD::SETNE); - EVT CallVT = TLI.getValueType(I.getType(), true); - setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT)); + processIntegerCallValue(I, Res, false); return true; } } @@ -5499,6 +5674,148 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { return false; } +/// visitMemChrCall -- See if we can lower a memchr call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { + // Verify that the prototype makes sense. void *memchr(void *, int, size_t) + if (I.getNumArgOperands() != 3) + return false; + + const Value *Src = I.getArgOperand(0); + const Value *Char = I.getArgOperand(1); + const Value *Length = I.getArgOperand(2); + if (!Src->getType()->isPointerTy() || + !Char->getType()->isIntegerTy() || + !Length->getType()->isIntegerTy() || + !I.getType()->isPointerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Src), getValue(Char), getValue(Length), + MachinePointerInfo(Src)); + if (Res.first.getNode()) { + setValue(&I, Res.first); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an +/// optimized form. If so, return true and lower it, otherwise return false +/// and it will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { + // Verify that the prototype makes sense. char *strcpy(char *, char *) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isPointerTy() || + !I.getType()->isPointerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0), + MachinePointerInfo(Arg1), isStpcpy); + if (Res.first.getNode()) { + setValue(&I, Res.first); + DAG.setRoot(Res.second); + return true; + } + + return false; +} + +/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form. +/// If so, return true and lower it, otherwise return false and it will be +/// lowered like a normal call. +bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { + // Verify that the prototype makes sense. int strcmp(void*,void*) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isPointerTy() || + !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0), + MachinePointerInfo(Arg1)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, true); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrLenCall -- See if we can lower a strlen call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { + // Verify that the prototype makes sense. size_t strlen(char *) + if (I.getNumArgOperands() != 1) + return false; + + const Value *Arg0 = I.getArgOperand(0); + if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), MachinePointerInfo(Arg0)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, false); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { + // Verify that the prototype makes sense. size_t strnlen(char *, size_t) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isIntegerTy() || + !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, false); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + /// visitUnaryFloatCall - If a call instruction is a unary floating-point /// operation (as expected), translate it to an SDNode with the specified opcode /// and return true. @@ -5512,7 +5829,7 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, return false; SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp)); + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp)); return true; } @@ -5561,7 +5878,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { I.onlyReadsMemory()) { SDValue LHS = getValue(I.getArgOperand(0)); SDValue RHS = getValue(I.getArgOperand(1)); - setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(), LHS.getValueType(), LHS, RHS)); return; } @@ -5587,6 +5904,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { case LibFunc::sqrt: case LibFunc::sqrtf: case LibFunc::sqrtl: + case LibFunc::sqrt_finite: + case LibFunc::sqrtf_finite: + case LibFunc::sqrtl_finite: if (visitUnaryFloatCall(I, ISD::FSQRT)) return; break; @@ -5614,6 +5934,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitUnaryFloatCall(I, ISD::FRINT)) return; break; + case LibFunc::round: + case LibFunc::roundf: + case LibFunc::roundl: + if (visitUnaryFloatCall(I, ISD::FROUND)) + return; + break; case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: @@ -5636,6 +5962,30 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitMemCmpCall(I)) return; break; + case LibFunc::memchr: + if (visitMemChrCall(I)) + return; + break; + case LibFunc::strcpy: + if (visitStrCpyCall(I, false)) + return; + break; + case LibFunc::stpcpy: + if (visitStrCpyCall(I, true)) + return; + break; + case LibFunc::strcmp: + if (visitStrCmpCall(I)) + return; + break; + case LibFunc::strlen: + if (visitStrLenCall(I)) + return; + break; + case LibFunc::strnlen: + if (visitStrNLenCall(I)) + return; + break; } } } @@ -5644,7 +5994,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (!RenameFn) Callee = getValue(I.getCalledValue()); else - Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); + Callee = DAG.getExternalSymbol(RenameFn, + TM.getTargetLowering()->getPointerTy()); // Check if we can potentially perform a tail call. More detailed checking is // be done within LowerCallTo, after more information about the call is known. @@ -5733,7 +6084,7 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; /// static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, - DebugLoc DL, + SDLoc DL, SDISelAsmOperandInfo &OpInfo) { LLVMContext &Context = *DAG.getContext(); @@ -5839,8 +6190,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { /// ConstraintOperands - Information about all of the constraints. SDISelAsmOperandInfoVector ConstraintOperands; + const TargetLowering *TLI = TM.getTargetLowering(); TargetLowering::AsmOperandInfoVector - TargetConstraints = TLI.ParseConstraints(CS); + TargetConstraints = TLI->ParseConstraints(CS); bool hasMemory = false; @@ -5865,10 +6217,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast<StructType>(CS.getType())) { - OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo)); + OpVT = TLI->getSimpleValueType(STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpVT = TLI.getSimpleValueType(CS.getType()); + OpVT = TLI->getSimpleValueType(CS.getType()); } ++ResNo; break; @@ -5889,7 +6241,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD). + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, TD). getSimpleVT(); } @@ -5901,7 +6253,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { else { for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { TargetLowering::ConstraintType - CType = TLI.getConstraintType(OpInfo.Codes[j]); + CType = TLI->getConstraintType(OpInfo.Codes[j]); if (CType == TargetLowering::C_Memory) { hasMemory = true; break; @@ -5933,11 +6285,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintVT != Input.ConstraintVT) { std::pair<unsigned, const TargetRegisterClass*> MatchRC = - TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); + TLI->getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); std::pair<unsigned, const TargetRegisterClass*> InputRC = - TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, - Input.ConstraintVT); + TLI->getRegForInlineAsmConstraint(Input.ConstraintCode, + Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (MatchRC.second != InputRC.second)) { @@ -5950,7 +6302,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); + TLI->ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.Type == InlineAsm::isClobber) @@ -5978,17 +6330,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), - TLI.getPointerTy()); + TLI->getPointerTy()); } else { // Otherwise, create a stack slot and emit a store to it before the // asm. Type *Ty = OpVal->getType(); - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); - unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); + unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); - Chain = DAG.getStore(Chain, getCurDebugLoc(), + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI->getPointerTy()); + Chain = DAG.getStore(Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(SSFI), false, false, 0); @@ -6005,7 +6357,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); + GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); } // Second pass - Loop over all of the operands, assigning virtual or physregs @@ -6016,7 +6368,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); + GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -6024,7 +6376,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands.push_back(SDValue()); // reserve space for input chain AsmNodeOperands.push_back( DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), - TLI.getPointerTy())); + TLI->getPointerTy())); // If we have a !srcloc metadata node associated with it, we want to attach // this to the ultimately generated inline asm machineinstr. To do this, we @@ -6047,7 +6399,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, SDValue()); + TLI->ComputeConstraintToUse(OpInfo, SDValue()); // Ideally, we would only check against memory constraints. However, the // meaning of an other constraint can be target-specific and we can't easily @@ -6065,7 +6417,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, - TLI.getPointerTy())); + TLI->getPointerTy())); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. @@ -6087,7 +6439,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add information to the INLINEASM node to know about this output. unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, - TLI.getPointerTy())); + TLI->getPointerTy())); AsmNodeOperands.push_back(OpInfo.CallOperand); break; } @@ -6098,10 +6450,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // we can use. if (OpInfo.AssignedRegs.Regs.empty()) { LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), + Ctx.emitError(CS.getInstruction(), "couldn't allocate output register for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); - break; + Twine(OpInfo.ConstraintCode) + "'"); + return; } // If this is an indirect operand, store through the pointer after the @@ -6118,13 +6470,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add information to the INLINEASM node to know that this register is // set. - OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ? - InlineAsm::Kind_RegDefEarlyClobber : - InlineAsm::Kind_RegDef, - false, - 0, - DAG, - AsmNodeOperands); + OpInfo.AssignedRegs + .AddInlineAsmOperands(OpInfo.isEarlyClobber + ? InlineAsm::Kind_RegDefEarlyClobber + : InlineAsm::Kind_RegDef, + false, 0, DAG, AsmNodeOperands); break; } case InlineAsm::isInput: { @@ -6156,10 +6506,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.isIndirect) { // This happens on gcc/testsuite/gcc.dg/pr8788-1.c LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" - " don't know how to handle tied " - "indirect register inputs"); - report_fatal_error("Cannot handle indirect register inputs!"); + Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" + " don't know how to handle tied " + "indirect register inputs"); + return; } RegsForValue MatchedRegs; @@ -6169,18 +6519,18 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); i != e; ++i) { - if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) + if (const TargetRegisterClass *RC = TLI->getRegClassFor(RegVT)) MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); else { LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), "inline asm error: This value" + Ctx.emitError(CS.getInstruction(), + "inline asm error: This value" " type register class is not natively supported!"); - report_fatal_error("inline asm error: This value type register " - "class is not natively supported!"); + return; } } // Use the produced MatchedRegs object to - MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), + MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), @@ -6196,7 +6546,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, OpInfo.getMatchedOperand()); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, - TLI.getPointerTy())); + TLI->getPointerTy())); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; } @@ -6208,34 +6558,34 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector<SDValue> Ops; - TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, - Ops, DAG); + TLI->LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, + Ops, DAG); if (Ops.empty()) { LLVMContext &Ctx = *DAG.getContext(); Ctx.emitError(CS.getInstruction(), "invalid operand for inline asm constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); - break; + Twine(OpInfo.ConstraintCode) + "'"); + return; } // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI.getPointerTy())); + TLI->getPointerTy())); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; } if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); - assert(InOperandVal.getValueType() == TLI.getPointerTy() && + assert(InOperandVal.getValueType() == TLI->getPointerTy() && "Memory operands expect pointer values"); // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI.getPointerTy())); + TLI->getPointerTy())); AsmNodeOperands.push_back(InOperandVal); break; } @@ -6249,20 +6599,21 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { LLVMContext &Ctx = *DAG.getContext(); Ctx.emitError(CS.getInstruction(), "Don't know how to handle indirect register inputs yet " - "for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); - break; + "for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; } // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), + Ctx.emitError(CS.getInstruction(), "couldn't allocate input reg for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); - break; + Twine(OpInfo.ConstraintCode) + "'"); + return; } - OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), + OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, @@ -6285,7 +6636,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; if (Flag.getNode()) AsmNodeOperands.push_back(Flag); - Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), + Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(), DAG.getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], AsmNodeOperands.size()); Flag = Chain.getValue(1); @@ -6293,12 +6644,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this asm returns a register value, copy the result from that register // and set it as the value of the call. if (!RetValRegs.Regs.empty()) { - SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), + SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { - EVT ResultType = TLI.getValueType(CS.getType()); + EVT ResultType = TLI->getValueType(CS.getType()); // If any of the results of the inline asm is a vector, it may have the // wrong width/num elts. This can happen for register classes that can @@ -6306,7 +6657,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // not have the same VT as was expected. Convert it to the right type // with bit_convert. if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { - Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), + Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultType, Val); } else if (ResultType != Val.getValueType() && @@ -6314,7 +6665,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If a result value was tied to an input value, the computed result may // have a wider width than the expected result. Extract the relevant // portion. - Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val); + Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultType, Val); } assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); @@ -6333,7 +6684,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; const Value *Ptr = IndirectStoresToEmit[i].second; - SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), + SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, IA); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); } @@ -6341,7 +6692,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Emit the non-flagged stores from the physregs. SmallVector<SDValue, 8> OutChains; for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) { - SDValue Val = DAG.getStore(Chain, getCurDebugLoc(), + SDValue Val = DAG.getStore(Chain, getCurSDLoc(), StoresToEmit[i].first, getValue(StoresToEmit[i].second), MachinePointerInfo(StoresToEmit[i].second), @@ -6350,22 +6701,23 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } if (!OutChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &OutChains[0], OutChains.size()); DAG.setRoot(Chain); } void SelectionDAGBuilder::visitVAStart(const CallInst &I) { - DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { - const DataLayout &TD = *TLI.getDataLayout(); - SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), + const TargetLowering *TLI = TM.getTargetLowering(); + const DataLayout &TD = *TLI->getDataLayout(); + SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), TD.getABITypeAlignment(I.getType())); @@ -6374,14 +6726,14 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { } void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { - DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVACopy(const CallInst &I) { - DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), @@ -6389,6 +6741,248 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { DAG.getSrcValue(I.getArgOperand(1)))); } +/// \brief Lower an argument list according to the target calling convention. +/// +/// \return A tuple of <return-value, token-chain> +/// +/// This is a helper for lowering intrinsics that follow a target calling +/// convention or require stack pointer adjustment. Only a subset of the +/// intrinsic's operands need to participate in the calling convention. +std::pair<SDValue, SDValue> +SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, + unsigned NumArgs, SDValue Callee, + bool useVoidTy) { + TargetLowering::ArgListTy Args; + Args.reserve(NumArgs); + + // Populate the argument list. + // Attributes for args start at offset 1, after the return attribute. + ImmutableCallSite CS(&CI); + for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; + ArgI != ArgE; ++ArgI) { + const Value *V = CI.getOperand(ArgI); + + assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); + + TargetLowering::ArgListEntry Entry; + Entry.Node = getValue(V); + Entry.Ty = V->getType(); + Entry.setAttributes(&CS, AttrI); + Args.push_back(Entry); + } + + Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType(); + TargetLowering::CallLoweringInfo CLI(getRoot(), retTy, /*retSExt*/ false, + /*retZExt*/ false, /*isVarArg*/ false, /*isInReg*/ false, NumArgs, + CI.getCallingConv(), /*isTailCall*/ false, /*doesNotReturn*/ false, + /*isReturnValueUsed*/ CI.use_empty(), Callee, Args, DAG, getCurSDLoc()); + + const TargetLowering *TLI = TM.getTargetLowering(); + return TLI->LowerCallTo(CLI); +} + +/// \brief Lower llvm.experimental.stackmap directly to its target opcode. +void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { + // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, + // [live variables...]) + + assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); + + SDValue Callee = getValue(CI.getCalledValue()); + + // Lower into a call sequence with no args and no return value. + std::pair<SDValue, SDValue> Result = LowerCallOperands(CI, 0, 0, Callee); + // Set the root to the target-lowered call chain. + SDValue Chain = Result.second; + DAG.setRoot(Chain); + + /// Get a call instruction from the call sequence chain. + /// Tail calls are not allowed. + SDNode *CallEnd = Chain.getNode(); + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && + "Expected a callseq node."); + SDNode *Call = CallEnd->getOperand(0).getNode(); + bool hasGlue = Call->getGluedNode(); + + // Replace the target specific call node with the stackmap intrinsic. + SmallVector<SDValue, 8> Ops; + + // Add the <id> and <numShadowBytes> constants. + for (unsigned i = 0; i < 2; ++i) { + SDValue tmp = getValue(CI.getOperand(i)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); + } + // Push live variables for the stack map. + for (unsigned i = 2, e = CI.getNumArgOperands(); i != e; ++i) + Ops.push_back(getValue(CI.getArgOperand(i))); + + // Push the chain (this is originally the first operand of the call, but + // becomes now the last or second to last operand). + Ops.push_back(*(Call->op_begin())); + + // Push the glue flag (last operand). + if (hasGlue) + Ops.push_back(*(Call->op_end()-1)); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + // Replace the target specific call node with a STACKMAP node. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::STACKMAP, getCurSDLoc(), + NodeTys, Ops); + + // StackMap generates no value, so nothing goes in the NodeMap. + + // Fixup the consumers of the intrinsic. The chain and glue may be used in the + // call sequence. + DAG.ReplaceAllUsesWith(Call, MN); + + DAG.DeleteNode(Call); +} + +/// \brief Lower llvm.experimental.patchpoint directly to its target opcode. +void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { + // void|i64 @llvm.experimental.patchpoint.void|i64(i32 <id>, + // i32 <numBytes>, + // i8* <target>, + // i32 <numArgs>, + // [Args...], + // [live variables...]) + + CallingConv::ID CC = CI.getCallingConv(); + bool isAnyRegCC = CC == CallingConv::AnyReg; + bool hasDef = !CI.getType()->isVoidTy(); + SDValue Callee = getValue(CI.getOperand(2)); // <target> + + // Get the real number of arguments participating in the call <numArgs> + unsigned NumArgs = + cast<ConstantSDNode>(getValue(CI.getArgOperand(3)))->getZExtValue(); + + // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> + assert(CI.getNumArgOperands() >= NumArgs + 4 && + "Not enough arguments provided to the patchpoint intrinsic"); + + // For AnyRegCC the arguments are lowered later on manually. + unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs; + std::pair<SDValue, SDValue> Result = + LowerCallOperands(CI, 4, NumCallArgs, Callee, isAnyRegCC); + + // Set the root to the target-lowered call chain. + SDValue Chain = Result.second; + DAG.setRoot(Chain); + + SDNode *CallEnd = Chain.getNode(); + if (hasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) + CallEnd = CallEnd->getOperand(0).getNode(); + + /// Get a call instruction from the call sequence chain. + /// Tail calls are not allowed. + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && + "Expected a callseq node."); + SDNode *Call = CallEnd->getOperand(0).getNode(); + bool hasGlue = Call->getGluedNode(); + + // Replace the target specific call node with the patchable intrinsic. + SmallVector<SDValue, 8> Ops; + + // Add the <id> and <numNopBytes> constants. + for (unsigned i = 0; i < 2; ++i) { + SDValue tmp = getValue(CI.getOperand(i)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); + } + // Assume that the Callee is a constant address. + Ops.push_back( + DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(), + /*isTarget=*/true)); + + // Adjust <numArgs> to account for any arguments that have been passed on the + // stack instead. + // Call Node: Chain, Target, {Args}, RegMask, [Glue] + unsigned NumCallRegArgs = Call->getNumOperands() - (hasGlue ? 4 : 3); + NumCallRegArgs = isAnyRegCC ? NumArgs : NumCallRegArgs; + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); + + // Add the calling convention + Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32)); + + // Add the arguments we omitted previously. The register allocator should + // place these in any free register. + if (isAnyRegCC) + for (unsigned i = 4, e = NumArgs + 4; i != e; ++i) + Ops.push_back(getValue(CI.getArgOperand(i))); + + // Push the arguments from the call instruction. + SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1; + for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) + Ops.push_back(*i); + + // Push live variables for the stack map. + for (unsigned i = NumArgs + 4, e = CI.getNumArgOperands(); i != e; ++i) { + SDValue OpVal = getValue(CI.getArgOperand(i)); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { + Ops.push_back( + DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back( + DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + } else + Ops.push_back(OpVal); + } + + // Push the register mask info. + if (hasGlue) + Ops.push_back(*(Call->op_end()-2)); + else + Ops.push_back(*(Call->op_end()-1)); + + // Push the chain (this is originally the first operand of the call, but + // becomes now the last or second to last operand). + Ops.push_back(*(Call->op_begin())); + + // Push the glue flag (last operand). + if (hasGlue) + Ops.push_back(*(Call->op_end()-1)); + + SDVTList NodeTys; + if (isAnyRegCC && hasDef) { + // Create the return types based on the intrinsic definition + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<EVT, 3> ValueVTs; + ComputeValueVTs(TLI, CI.getType(), ValueVTs); + assert(ValueVTs.size() == 1 && "Expected only one return value type."); + + // There is always a chain and a glue type at the end + ValueVTs.push_back(MVT::Other); + ValueVTs.push_back(MVT::Glue); + NodeTys = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); + } else + NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + // Replace the target specific call node with a PATCHPOINT node. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, + getCurSDLoc(), NodeTys, Ops); + + // Update the NodeMap. + if (hasDef) { + if (isAnyRegCC) + setValue(&CI, SDValue(MN, 0)); + else + setValue(&CI, Result.first); + } + + // Fixup the consumers of the intrinsic. The chain and glue may be used in the + // call sequence. Furthermore the location of the chain and glue can change + // when the AnyReg calling convention is used and the intrinsic returns a + // value. + if (isAnyRegCC && hasDef) { + SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; + SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + } else + DAG.ReplaceAllUsesWith(Call, MN); + DAG.DeleteNode(Call); +} + /// TargetLowering::LowerCallTo - This is the default LowerCallTo /// implementation, which just calls LowerCall. /// FIXME: When all targets are @@ -6406,6 +7000,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT; + MyFlags.ArgVT = VT; MyFlags.Used = CLI.IsReturnValueUsed; if (CLI.RetSExt) MyFlags.Flags.setSExt(); @@ -6495,7 +7090,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 - ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), + ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, i < CLI.NumFixedArgs, i, j*Parts[j].getValueType().getStoreSize()); if (NumParts > 1 && j == 0) @@ -6588,9 +7183,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { "Copy from a reg to the same reg!"); assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); - RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); + const TargetLowering *TLI = TM.getTargetLowering(); + RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); - RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0, V); + RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, 0, V); PendingExports.push_back(Chain); } @@ -6617,21 +7213,23 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; - DebugLoc dl = SDB->getCurDebugLoc(); - const DataLayout *TD = TLI.getDataLayout(); + SDLoc dl = SDB->getCurSDLoc(); + const TargetLowering *TLI = getTargetLowering(); + const DataLayout *TD = TLI->getDataLayout(); SmallVector<ISD::InputArg, 16> Ins; if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector<EVT, 1> ValueVTs; - ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + ComputeValueVTs(*getTargetLowering(), + PointerType::getUnqual(F.getReturnType()), ValueVTs); // NOTE: Assuming that a pointer will never break down to more than one VT // or one register. ISD::ArgFlagsTy Flags; Flags.setSRet(); - MVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]); - ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0); + MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]); + ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0); Ins.push_back(RetArg); } @@ -6640,8 +7238,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) { for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I->getType(), ValueVTs); + ComputeValueVTs(*TLI, I->getType(), ValueVTs); bool isArgValueUsed = !I->use_empty(); + unsigned PartBase = 0; for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; @@ -6669,18 +7268,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (F.getParamAlignment(Idx)) FrameAlign = F.getParamAlignment(Idx); else - FrameAlign = TLI.getByValTypeAlignment(ElementTy); + FrameAlign = TLI->getByValTypeAlignment(ElementTy); Flags.setByValAlign(FrameAlign); } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); Flags.setOrigAlign(OriginalAlignment); - MVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT); - unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT); + MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); + unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed, - Idx-1, i*RegisterVT.getStoreSize()); + ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, + Idx-1, PartBase+i*RegisterVT.getStoreSize()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 @@ -6688,14 +7287,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MyFlags.Flags.setOrigAlign(1); Ins.push_back(MyFlags); } + PartBase += VT.getStoreSize(); } } // Call the target to set up the argument values. SmallVector<SDValue, 8> InVals; - SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(), - F.isVarArg(), Ins, - dl, DAG, InVals); + SDValue NewRoot = TLI->LowerFormalArguments(DAG.getRoot(), F.getCallingConv(), + F.isVarArg(), Ins, + dl, DAG, InVals); // Verify that the target's LowerFormalArguments behaved as expected. assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && @@ -6721,18 +7321,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. SmallVector<EVT, 1> ValueVTs; - ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); - MVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); ISD::NodeType AssertOp = ISD::DELETED_NODE; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, NULL, AssertOp); MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); - unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)); + unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); FuncInfo->DemoteRegister = SRetReg; - NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(), + NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue); DAG.setRoot(NewRoot); @@ -6745,18 +7345,24 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ++I, ++Idx) { SmallVector<SDValue, 4> ArgValues; SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I->getType(), ValueVTs); + ComputeValueVTs(*TLI, I->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); // If this argument is unused then remember its value. It is used to generate // debugging information. - if (I->use_empty() && NumValues) + if (I->use_empty() && NumValues) { SDB->setUnusedArgValue(I, InVals[i]); + // Also remember any frame index for use in FastISel. + if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(InVals[i].getNode())) + FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); + } + for (unsigned Val = 0; Val != NumValues; ++Val) { EVT VT = ValueVTs[Val]; - MVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); - unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT); + MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT); + unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); if (!I->use_empty()) { ISD::NodeType AssertOp = ISD::DELETED_NODE; @@ -6783,11 +7389,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues, - SDB->getCurDebugLoc()); + SDB->getCurSDLoc()); SDB->setValue(I, Res); if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { - if (LoadSDNode *LNode = + if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) @@ -6885,15 +7491,36 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Remember that this register needs to added to the machine PHI node as // the input for this MBB. SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, PN->getType(), ValueVTs); + const TargetLowering *TLI = TM.getTargetLowering(); + ComputeValueVTs(*TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); + unsigned NumRegisters = TLI->getNumRegisters(*DAG.getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); Reg += NumRegisters; } } } + ConstantsOut.clear(); } + +/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB +/// is 0. +MachineBasicBlock * +SelectionDAGBuilder::StackProtectorDescriptor:: +AddSuccessorMBB(const BasicBlock *BB, + MachineBasicBlock *ParentMBB, + MachineBasicBlock *SuccMBB) { + // If SuccBB has not been created yet, create it. + if (!SuccMBB) { + MachineFunction *MF = ParentMBB->getParent(); + MachineFunction::iterator BBI = ParentMBB; + SuccMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(++BBI, SuccMBB); + } + // Add it as a successor of ParentMBB. + ParentMBB->addSuccessor(SuccMBB); + return SuccMBB; +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 9188945..835f643 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -1,4 +1,4 @@ -//===-- SelectionDAGBuilder.h - Selection-DAG building --------------------===// +//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- C++ -*---===// // // The LLVM Compiler Infrastructure // @@ -26,6 +26,7 @@ namespace llvm { +class AddrSpaceCastInst; class AliasAnalysis; class AllocaInst; class BasicBlock; @@ -80,11 +81,11 @@ class ZExtInst; /// implementation that is parameterized by a TargetLowering object. /// class SelectionDAGBuilder { - /// CurDebugLoc - current file + line number. Changes as we build the DAG. - DebugLoc CurDebugLoc; + /// CurInst - The current instruction being visited + const Instruction *CurInst; DenseMap<const Value*, SDValue> NodeMap; - + /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used /// to preserve debug information for incoming arguments. DenseMap<const Value*, SDValue> UnusedArgNodeMap; @@ -182,6 +183,17 @@ private: typedef std::vector<CaseRec> CaseRecVector; + /// The comparison function for sorting the switch case values in the vector. + /// WARNING: Case ranges should be disjoint! + struct CaseCmp { + bool operator()(const Case &C1, const Case &C2) { + assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High)); + const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); + const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); + return CI1->getValue().slt(CI2->getValue()); + } + }; + struct CaseBitsCmp { bool operator()(const CaseBits &C1, const CaseBits &C2) { return C1.Bits > C2.Bits; @@ -224,7 +236,7 @@ private: struct JumpTable { JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {} - + /// Reg - the virtual register containing the index of the jump table entry //. to jump to. unsigned Reg; @@ -278,12 +290,204 @@ private: BitTestInfo Cases; }; -public: - // TLI - This is information that describes the available target features we - // need for lowering. This indicates when operations are unavailable, - // implemented with a libcall, etc. + /// A class which encapsulates all of the information needed to generate a + /// stack protector check and signals to isel via its state being initialized + /// that a stack protector needs to be generated. + /// + /// *NOTE* The following is a high level documentation of SelectionDAG Stack + /// Protector Generation. The reason that it is placed here is for a lack of + /// other good places to stick it. + /// + /// High Level Overview of SelectionDAG Stack Protector Generation: + /// + /// Previously, generation of stack protectors was done exclusively in the + /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated + /// splitting basic blocks at the IR level to create the success/failure basic + /// blocks in the tail of the basic block in question. As a result of this, + /// calls that would have qualified for the sibling call optimization were no + /// longer eligible for optimization since said calls were no longer right in + /// the "tail position" (i.e. the immediate predecessor of a ReturnInst + /// instruction). + /// + /// Then it was noticed that since the sibling call optimization causes the + /// callee to reuse the caller's stack, if we could delay the generation of + /// the stack protector check until later in CodeGen after the sibling call + /// decision was made, we get both the tail call optimization and the stack + /// protector check! + /// + /// A few goals in solving this problem were: + /// + /// 1. Preserve the architecture independence of stack protector generation. + /// + /// 2. Preserve the normal IR level stack protector check for platforms like + /// OpenBSD for which we support platform specific stack protector + /// generation. + /// + /// The main problem that guided the present solution is that one can not + /// solve this problem in an architecture independent manner at the IR level + /// only. This is because: + /// + /// 1. The decision on whether or not to perform a sibling call on certain + /// platforms (for instance i386) requires lower level information + /// related to available registers that can not be known at the IR level. + /// + /// 2. Even if the previous point were not true, the decision on whether to + /// perform a tail call is done in LowerCallTo in SelectionDAG which + /// occurs after the Stack Protector Pass. As a result, one would need to + /// put the relevant callinst into the stack protector check success + /// basic block (where the return inst is placed) and then move it back + /// later at SelectionDAG/MI time before the stack protector check if the + /// tail call optimization failed. The MI level option was nixed + /// immediately since it would require platform specific pattern + /// matching. The SelectionDAG level option was nixed because + /// SelectionDAG only processes one IR level basic block at a time + /// implying one could not create a DAG Combine to move the callinst. + /// + /// To get around this problem a few things were realized: + /// + /// 1. While one can not handle multiple IR level basic blocks at the + /// SelectionDAG Level, one can generate multiple machine basic blocks + /// for one IR level basic block. This is how we handle bit tests and + /// switches. + /// + /// 2. At the MI level, tail calls are represented via a special return + /// MIInst called "tcreturn". Thus if we know the basic block in which we + /// wish to insert the stack protector check, we get the correct behavior + /// by always inserting the stack protector check right before the return + /// statement. This is a "magical transformation" since no matter where + /// the stack protector check intrinsic is, we always insert the stack + /// protector check code at the end of the BB. + /// + /// Given the aforementioned constraints, the following solution was devised: + /// + /// 1. On platforms that do not support SelectionDAG stack protector check + /// generation, allow for the normal IR level stack protector check + /// generation to continue. + /// + /// 2. On platforms that do support SelectionDAG stack protector check + /// generation: + /// + /// a. Use the IR level stack protector pass to decide if a stack + /// protector is required/which BB we insert the stack protector check + /// in by reusing the logic already therein. If we wish to generate a + /// stack protector check in a basic block, we place a special IR + /// intrinsic called llvm.stackprotectorcheck right before the BB's + /// returninst or if there is a callinst that could potentially be + /// sibling call optimized, before the call inst. + /// + /// b. Then when a BB with said intrinsic is processed, we codegen the BB + /// normally via SelectBasicBlock. In said process, when we visit the + /// stack protector check, we do not actually emit anything into the + /// BB. Instead, we just initialize the stack protector descriptor + /// class (which involves stashing information/creating the success + /// mbbb and the failure mbb if we have not created one for this + /// function yet) and export the guard variable that we are going to + /// compare. + /// + /// c. After we finish selecting the basic block, in FinishBasicBlock if + /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is + /// initialized, we first find a splice point in the parent basic block + /// before the terminator and then splice the terminator of said basic + /// block into the success basic block. Then we code-gen a new tail for + /// the parent basic block consisting of the two loads, the comparison, + /// and finally two branches to the success/failure basic blocks. We + /// conclude by code-gening the failure basic block if we have not + /// code-gened it already (all stack protector checks we generate in + /// the same function, use the same failure basic block). + class StackProtectorDescriptor { + public: + StackProtectorDescriptor() : ParentMBB(0), SuccessMBB(0), FailureMBB(0), + Guard(0) { } + ~StackProtectorDescriptor() { } + + /// Returns true if all fields of the stack protector descriptor are + /// initialized implying that we should/are ready to emit a stack protector. + bool shouldEmitStackProtector() const { + return ParentMBB && SuccessMBB && FailureMBB && Guard; + } + + /// Initialize the stack protector descriptor structure for a new basic + /// block. + void initialize(const BasicBlock *BB, + MachineBasicBlock *MBB, + const CallInst &StackProtCheckCall) { + // Make sure we are not initialized yet. + assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is " + "already initialized!"); + ParentMBB = MBB; + SuccessMBB = AddSuccessorMBB(BB, MBB); + FailureMBB = AddSuccessorMBB(BB, MBB, FailureMBB); + if (!Guard) + Guard = StackProtCheckCall.getArgOperand(0); + } + + /// Reset state that changes when we handle different basic blocks. + /// + /// This currently includes: + /// + /// 1. The specific basic block we are generating a + /// stack protector for (ParentMBB). + /// + /// 2. The successor machine basic block that will contain the tail of + /// parent mbb after we create the stack protector check (SuccessMBB). This + /// BB is visited only on stack protector check success. + void resetPerBBState() { + ParentMBB = 0; + SuccessMBB = 0; + } + + /// Reset state that only changes when we switch functions. + /// + /// This currently includes: + /// + /// 1. FailureMBB since we reuse the failure code path for all stack + /// protector checks created in an individual function. + /// + /// 2.The guard variable since the guard variable we are checking against is + /// always the same. + void resetPerFunctionState() { + FailureMBB = 0; + Guard = 0; + } + + MachineBasicBlock *getParentMBB() { return ParentMBB; } + MachineBasicBlock *getSuccessMBB() { return SuccessMBB; } + MachineBasicBlock *getFailureMBB() { return FailureMBB; } + const Value *getGuard() { return Guard; } + + private: + /// The basic block for which we are generating the stack protector. + /// + /// As a result of stack protector generation, we will splice the + /// terminators of this basic block into the successor mbb SuccessMBB and + /// replace it with a compare/branch to the successor mbbs + /// SuccessMBB/FailureMBB depending on whether or not the stack protector + /// was violated. + MachineBasicBlock *ParentMBB; + + /// A basic block visited on stack protector check success that contains the + /// terminators of ParentMBB. + MachineBasicBlock *SuccessMBB; + + /// This basic block visited on stack protector check failure that will + /// contain a call to __stack_chk_fail(). + MachineBasicBlock *FailureMBB; + + /// The guard variable which we will compare against the stored value in the + /// stack protector stack slot. + const Value *Guard; + + /// Add a successor machine basic block to ParentMBB. If the successor mbb + /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic + /// block will be created. + MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB, + MachineBasicBlock *ParentMBB, + MachineBasicBlock *SuccMBB = 0); + }; + +private: const TargetMachine &TM; - const TargetLowering &TLI; +public: SelectionDAG &DAG; const DataLayout *TD; AliasAnalysis *AA; @@ -298,6 +502,9 @@ public: /// BitTestCases - Vector of BitTestBlock structures used to communicate /// SwitchInst code generation information. std::vector<BitTestBlock> BitTestCases; + /// A StackProtectorDescriptor structure used to communicate stack protector + /// information in between SelectBasicBlock and FinishBasicBlock. + StackProtectorDescriptor SPDescriptor; // Emit PHI-node-operand constants only once even if used by multiple // PHI nodes. @@ -308,9 +515,9 @@ public: FunctionLoweringInfo &FuncInfo; /// OptLevel - What optimization level we're generating code for. - /// + /// CodeGenOpt::Level OptLevel; - + /// GFI - Garbage collection metadata for the function. GCFunctionInfo *GFI; @@ -327,7 +534,7 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) - : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), + : CurInst(NULL), SDNodeOrder(0), TM(dag.getTarget()), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), HasTailCall(false) { } @@ -364,17 +571,18 @@ public: /// SDValue getControlRoot(); - DebugLoc getCurDebugLoc() const { return CurDebugLoc; } + SDLoc getCurSDLoc() const { + return SDLoc(CurInst, SDNodeOrder); + } + + DebugLoc getCurDebugLoc() const { + return CurInst ? CurInst->getDebugLoc() : DebugLoc(); + } unsigned getSDNodeOrder() const { return SDNodeOrder; } void CopyValueToVirtualRegister(const Value *V, unsigned Reg); - /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten - /// from how the code appeared in the source. The ordering is used by the - /// scheduler to effectively turn off scheduling. - void AssignOrderingToNode(const SDNode *Node); - void visit(const Instruction &I); void visit(unsigned Opcode, const User &I); @@ -391,7 +599,7 @@ public: assert(N.getNode() == 0 && "Already set a value for this node!"); N = NewN; } - + void setUnusedArgValue(const Value *V, SDValue NewN) { SDValue &N = UnusedArgNodeMap[V]; assert(N.getNode() == 0 && "Already set a value for this node!"); @@ -412,6 +620,12 @@ public: void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, MachineBasicBlock *LandingPad = NULL); + std::pair<SDValue, SDValue> LowerCallOperands(const CallInst &CI, + unsigned ArgIdx, + unsigned NumArgs, + SDValue Callee, + bool useVoidTy = false); + /// UpdateSplitBlock - When an MBB was split during scheduling, update the /// references that ned to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); @@ -453,6 +667,9 @@ private: public: void visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB); + void visitSPDescriptorParent(StackProtectorDescriptor &SPD, + MachineBasicBlock *ParentBB); + void visitSPDescriptorFailure(StackProtectorDescriptor &SPD); void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); void visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, @@ -463,7 +680,7 @@ public: void visitJumpTable(JumpTable &JT); void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB); - + private: // These all get lowered before this pass. void visitInvoke(const InvokeInst &I); @@ -504,6 +721,7 @@ private: void visitPtrToInt(const User &I); void visitIntToPtr(const User &I); void visitBitCast(const User &I); + void visitAddrSpaceCast(const User &I); void visitExtractElement(const User &I); void visitInsertElement(const User &I); @@ -525,6 +743,11 @@ private: void visitPHI(const PHINode &I); void visitCall(const CallInst &I); bool visitMemCmpCall(const CallInst &I); + bool visitMemChrCall(const CallInst &I); + bool visitStrCpyCall(const CallInst &I, bool isStpcpy); + bool visitStrCmpCall(const CallInst &I); + bool visitStrLenCall(const CallInst &I); + bool visitStrNLenCall(const CallInst &I); bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); void visitAtomicLoad(const LoadInst &I); void visitAtomicStore(const StoreInst &I); @@ -537,6 +760,8 @@ private: void visitVAArg(const VAArgInst &I); void visitVAEnd(const CallInst &I); void visitVACopy(const CallInst &I); + void visitStackmap(const CallInst &I); + void visitPatchpoint(const CallInst &I); void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); @@ -545,10 +770,13 @@ private: llvm_unreachable("UserOp2 should not exist at instruction selection time!"); } + void processIntegerCallValue(const Instruction &I, + SDValue Value, bool IsSigned); + void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); /// EmitFuncArgumentDbgValue - If V is an function argument then create - /// corresponding DBG_VALUE machine instruction for it now. At the end of + /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, int64_t Offset, const SDValue &N); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 47b0391..c04a08d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -92,9 +92,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; - case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; - case ISD::LSDAADDR: return "LSDAADDR"; - case ISD::EHSELECTION: return "EHSELECTION"; case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; @@ -145,6 +142,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FCEIL: return "fceil"; case ISD::FRINT: return "frint"; case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FROUND: return "fround"; case ISD::FEXP: return "fexp"; case ISD::FEXP2: return "fexp2"; case ISD::FLOG: return "flog"; @@ -226,6 +224,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FP_TO_SINT: return "fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; case ISD::BITCAST: return "bitcast"; + case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP32: return "fp16_to_fp32"; case ISD::FP32_TO_FP16: return "fp32_to_fp16"; @@ -487,10 +486,16 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << " " << offset; if (unsigned int TF = BA->getTargetFlags()) OS << " [TF=" << TF << ']'; + } else if (const AddrSpaceCastSDNode *ASC = + dyn_cast<AddrSpaceCastSDNode>(this)) { + OS << '[' + << ASC->getSrcAddressSpace() + << " -> " + << ASC->getDestAddressSpace() + << ']'; } - if (G) - if (unsigned Order = G->GetOrdering(this)) + if (unsigned Order = getIROrder()) OS << " [ORD=" << Order << ']'; if (getNodeId() != -1) @@ -501,8 +506,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { DIScope Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); OS << " dbg:"; + assert((!Scope || Scope.isScope()) && + "Scope of a DebugLoc should be null or a DIScope."); // Omit the directory, since it's usually long and uninteresting. - if (Scope.Verify()) + if (Scope) OS << Scope.getFilename(); else OS << "<unknown>"; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 9935626..3a0cfa1 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -222,23 +223,61 @@ defaultListDAGScheduler("default", "Best scheduler for the target", namespace llvm { //===--------------------------------------------------------------------===// + /// \brief This class is used by SelectionDAGISel to temporarily override + /// the optimization level on a per-function basis. + class OptLevelChanger { + SelectionDAGISel &IS; + CodeGenOpt::Level SavedOptLevel; + bool SavedFastISel; + + public: + OptLevelChanger(SelectionDAGISel &ISel, + CodeGenOpt::Level NewOptLevel) : IS(ISel) { + SavedOptLevel = IS.OptLevel; + if (NewOptLevel == SavedOptLevel) + return; + IS.OptLevel = NewOptLevel; + IS.TM.setOptLevel(NewOptLevel); + SavedFastISel = IS.TM.Options.EnableFastISel; + if (NewOptLevel == CodeGenOpt::None) + IS.TM.setFastISel(true); + DEBUG(dbgs() << "\nChanging optimization level for Function " + << IS.MF->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel + << " ; After: -O" << NewOptLevel << "\n"); + } + + ~OptLevelChanger() { + if (IS.OptLevel == SavedOptLevel) + return; + DEBUG(dbgs() << "\nRestoring optimization level for Function " + << IS.MF->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel + << " ; After: -O" << SavedOptLevel << "\n"); + IS.OptLevel = SavedOptLevel; + IS.TM.setOptLevel(SavedOptLevel); + IS.TM.setFastISel(SavedFastISel); + } + }; + + //===--------------------------------------------------------------------===// /// createDefaultScheduler - This creates an instruction scheduler appropriate /// for the target. ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetLowering &TLI = IS->getTargetLowering(); + const TargetLowering *TLI = IS->getTargetLowering(); const TargetSubtargetInfo &ST = IS->TM.getSubtarget<TargetSubtargetInfo>(); - if (OptLevel == CodeGenOpt::None || ST.enableMachineScheduler() || - TLI.getSchedulingPreference() == Sched::Source) + if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() || + TLI->getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); - if (TLI.getSchedulingPreference() == Sched::RegPressure) + if (TLI->getSchedulingPreference() == Sched::RegPressure) return createBURRListDAGScheduler(IS, OptLevel); - if (TLI.getSchedulingPreference() == Sched::Hybrid) + if (TLI->getSchedulingPreference() == Sched::Hybrid) return createHybridListDAGScheduler(IS, OptLevel); - if (TLI.getSchedulingPreference() == Sched::VLIW) + if (TLI->getSchedulingPreference() == Sched::VLIW) return createVLIWDAGScheduler(IS, OptLevel); - assert(TLI.getSchedulingPreference() == Sched::ILP && + assert(TLI->getSchedulingPreference() == Sched::ILP && "Unknown sched type!"); return createILPListDAGScheduler(IS, OptLevel); } @@ -275,10 +314,10 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // SelectionDAGISel code //===----------------------------------------------------------------------===// -SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, +SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) : - MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()), - FuncInfo(new FunctionLoweringInfo(TLI)), + MachineFunctionPass(ID), TM(tm), + FuncInfo(new FunctionLoweringInfo(TM)), CurDAG(new SelectionDAG(tm, OL)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), @@ -355,6 +394,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { const Function &Fn = *mf.getFunction(); const TargetInstrInfo &TII = *TM.getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + const TargetLowering *TLI = TM.getTargetLowering(); MF = &mf; RegInfo = &MF->getRegInfo(); @@ -368,11 +408,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { ST.resetSubtargetFeatures(MF); TM.resetTargetOptions(MF); + // Reset OptLevel to None for optnone functions. + CodeGenOpt::Level NewOptLevel = OptLevel; + if (Fn.hasFnAttribute(Attribute::OptimizeNone)) + NewOptLevel = CodeGenOpt::None; + OptLevelChanger OLC(*this, NewOptLevel); + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); - CurDAG->init(*MF, TTI); + CurDAG->init(*MF, TTI, TLI); FuncInfo->set(Fn, *MF); if (UseMBPI && OptLevel != CodeGenOpt::None) @@ -401,29 +447,37 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Insert DBG_VALUE instructions for function arguments to the entry block. for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; - unsigned Reg = MI->getOperand(0).getReg(); + bool hasFI = MI->getOperand(0).isFI(); + unsigned Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) EntryMBB->insert(EntryMBB->begin(), MI); else { MachineInstr *Def = RegInfo->getVRegDef(Reg); - MachineBasicBlock::iterator InsertPos = Def; - // FIXME: VR def may not be in entry block. - Def->getParent()->insert(llvm::next(InsertPos), MI); + if (Def) { + MachineBasicBlock::iterator InsertPos = Def; + // FIXME: VR def may not be in entry block. + Def->getParent()->insert(llvm::next(InsertPos), MI); + } else + DEBUG(dbgs() << "Dropping debug info for dead vreg" + << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); } // If Reg is live-in then update debug info to track its copy in a vreg. DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg); if (LDI != LiveInMap.end()) { + assert(!hasFI && "There's no handling of frame pointer updating here yet " + "- add if needed"); MachineInstr *Def = RegInfo->getVRegDef(LDI->second); MachineBasicBlock::iterator InsertPos = Def; const MDNode *Variable = MI->getOperand(MI->getNumOperands()-1).getMetadata(); - unsigned Offset = MI->getOperand(1).getImm(); + bool IsIndirect = MI->isIndirectDebugValue(); + unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; // Def is never a terminator here, so it is ok to increment InsertPos. BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), - TII.get(TargetOpcode::DBG_VALUE)) - .addReg(LDI->second, RegState::Debug) - .addImm(Offset).addMetadata(Variable); + TII.get(TargetOpcode::DBG_VALUE), + IsIndirect, + LDI->second, Offset, Variable); // If this vreg is directly copied into an exported register then // that COPY instructions also need DBG_VALUE, if it is the only @@ -442,9 +496,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (CopyUseMI) { MachineInstr *NewMI = BuildMI(*MF, CopyUseMI->getDebugLoc(), - TII.get(TargetOpcode::DBG_VALUE)) - .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug) - .addImm(Offset).addMetadata(Variable); + TII.get(TargetOpcode::DBG_VALUE), + IsIndirect, + CopyUseMI->getOperand(0).getReg(), + Offset, Variable); MachineBasicBlock::iterator Pos = CopyUseMI; EntryMBB->insertAfter(Pos, NewMI); } @@ -491,6 +546,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (J == E) break; To = J->second; } + // Make sure the new register has a sufficiently constrained register class. + if (TargetRegisterInfo::isVirtualRegister(From) && + TargetRegisterInfo::isVirtualRegister(To)) + MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. MRI.replaceRegWith(From, To); } @@ -611,6 +670,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); + CurDAG->NewNodesMustHaveLegalTypes = true; + if (Changed) { if (ViewDAGCombineLT) CurDAG->viewGraph("dag-combine-lt input for " + BlockName); @@ -624,6 +685,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); + } { @@ -790,9 +852,6 @@ void SelectionDAGISel::DoInstructionSelection() { continue; // Replace node. if (ResNode) { - // Propagate ordering - CurDAG->AssignOrdering(ResNode, CurDAG->GetOrdering(Node)); - ReplaceUses(Node, ResNode); } @@ -827,12 +886,13 @@ void SelectionDAGISel::PrepareEHLandingPad() { .addSym(Label); // Mark exception register as live in. - const TargetRegisterClass *PtrRC = TLI.getRegClassFor(TLI.getPointerTy()); - if (unsigned Reg = TLI.getExceptionPointerRegister()) + const TargetLowering *TLI = getTargetLowering(); + const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); + if (unsigned Reg = TLI->getExceptionPointerRegister()) FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); // Mark exception selector register as live in. - if (unsigned Reg = TLI.getExceptionSelectorRegister()) + if (unsigned Reg = TLI->getExceptionSelectorRegister()) FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC); } @@ -932,7 +992,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; if (TM.Options.EnableFastISel) - FastIS = TLI.createFastISel(*FuncInfo, LibInfo); + FastIS = getTargetLowering()->createFastISel(*FuncInfo, LibInfo); // Iterate over all basic blocks in the function. ReversePostOrderTraversal<const Function*> RPOT(&Fn); @@ -1135,6 +1195,91 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { delete FastIS; SDB->clearDanglingDebugInfo(); + SDB->SPDescriptor.resetPerFunctionState(); +} + +/// Given that the input MI is before a partial terminator sequence TSeq, return +/// true if M + TSeq also a partial terminator sequence. +/// +/// A Terminator sequence is a sequence of MachineInstrs which at this point in +/// lowering copy vregs into physical registers, which are then passed into +/// terminator instructors so we can satisfy ABI constraints. A partial +/// terminator sequence is an improper subset of a terminator sequence (i.e. it +/// may be the whole terminator sequence). +static bool MIIsInTerminatorSequence(const MachineInstr *MI) { + // If we do not have a copy or an implicit def, we return true if and only if + // MI is a debug value. + if (!MI->isCopy() && !MI->isImplicitDef()) + // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the + // physical registers if there is debug info associated with the terminator + // of our mbb. We want to include said debug info in our terminator + // sequence, so we return true in that case. + return MI->isDebugValue(); + + // We have left the terminator sequence if we are not doing one of the + // following: + // + // 1. Copying a vreg into a physical register. + // 2. Copying a vreg into a vreg. + // 3. Defining a register via an implicit def. + + // OPI should always be a register definition... + MachineInstr::const_mop_iterator OPI = MI->operands_begin(); + if (!OPI->isReg() || !OPI->isDef()) + return false; + + // Defining any register via an implicit def is always ok. + if (MI->isImplicitDef()) + return true; + + // Grab the copy source... + MachineInstr::const_mop_iterator OPI2 = OPI; + ++OPI2; + assert(OPI2 != MI->operands_end() + && "Should have a copy implying we should have 2 arguments."); + + // Make sure that the copy dest is not a vreg when the copy source is a + // physical register. + if (!OPI2->isReg() || + (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) && + TargetRegisterInfo::isPhysicalRegister(OPI2->getReg()))) + return false; + + return true; +} + +/// Find the split point at which to splice the end of BB into its success stack +/// protector check machine basic block. +/// +/// On many platforms, due to ABI constraints, terminators, even before register +/// allocation, use physical registers. This creates an issue for us since +/// physical registers at this point can not travel across basic +/// blocks. Luckily, selectiondag always moves physical registers into vregs +/// when they enter functions and moves them through a sequence of copies back +/// into the physical registers right before the terminator creating a +/// ``Terminator Sequence''. This function is searching for the beginning of the +/// terminator sequence so that we can ensure that we splice off not just the +/// terminator, but additionally the copies that move the vregs into the +/// physical registers. +static MachineBasicBlock::iterator +FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) { + MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); + // + if (SplitPoint == BB->begin()) + return SplitPoint; + + MachineBasicBlock::iterator Start = BB->begin(); + MachineBasicBlock::iterator Previous = SplitPoint; + --Previous; + + while (MIIsInTerminatorSequence(Previous)) { + SplitPoint = Previous; + if (Previous == Start) + break; + --Previous; + } + + return SplitPoint; } void @@ -1147,11 +1292,13 @@ SelectionDAGISel::FinishBasicBlock() { << FuncInfo->PHINodesToUpdate[i].first << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); + const bool MustUpdatePHINodes = SDB->SwitchCases.empty() && + SDB->JTCases.empty() && + SDB->BitTestCases.empty(); + // Next, now that we know what the last MBB the LLVM BB expanded is, update // PHI nodes in successors. - if (SDB->SwitchCases.empty() && - SDB->JTCases.empty() && - SDB->BitTestCases.empty()) { + if (MustUpdatePHINodes) { for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); assert(PHI->isPHI() && @@ -1160,9 +1307,54 @@ SelectionDAGISel::FinishBasicBlock() { continue; PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); } - return; } + // Handle stack protector. + if (SDB->SPDescriptor.shouldEmitStackProtector()) { + MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB(); + MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB(); + + // Find the split point to split the parent mbb. At the same time copy all + // physical registers used in the tail of parent mbb into virtual registers + // before the split point and back into physical registers after the split + // point. This prevents us needing to deal with Live-ins and many other + // register allocation issues caused by us splitting the parent mbb. The + // register allocator will clean up said virtual copies later on. + MachineBasicBlock::iterator SplitPoint = + FindSplitPointForStackProtector(ParentMBB, SDB->getCurDebugLoc()); + + // Splice the terminator of ParentMBB into SuccessMBB. + SuccessMBB->splice(SuccessMBB->end(), ParentMBB, + SplitPoint, + ParentMBB->end()); + + // Add compare/jump on neq/jump to the parent BB. + FuncInfo->MBB = ParentMBB; + FuncInfo->InsertPt = ParentMBB->end(); + SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // CodeGen Failure MBB if we have not codegened it yet. + MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB(); + if (!FailureMBB->size()) { + FuncInfo->MBB = FailureMBB; + FuncInfo->InsertPt = FailureMBB->end(); + SDB->visitSPDescriptorFailure(SDB->SPDescriptor); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + // Clear the Per-BB State. + SDB->SPDescriptor.resetPerBBState(); + } + + // If we updated PHI Nodes, return early. + if (MustUpdatePHINodes) + return; + for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) { // Lower header first, if it wasn't already lowered if (!SDB->BitTestCases[i].Emitted) { @@ -1609,7 +1801,7 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { SelectInlineAsmMemoryOperands(Ops); EVT VTs[] = { MVT::Other, MVT::Glue }; - SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), + SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), VTs, &Ops[0], Ops.size()); New->setNodeId(-1); return New.getNode(); @@ -1881,10 +2073,9 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, } } - SDValue Res; if (InputChains.size() == 1) return InputChains[0]; - return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(), + return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]), MVT::Other, &InputChains[0], InputChains.size()); } @@ -1957,6 +2148,18 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, return N == RecordedNodes[RecNo].first; } +/// CheckChildSame - Implements OP_CheckChildXSame. +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, + const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes, + unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo), + RecordedNodes); +} + /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, @@ -1981,24 +2184,23 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const TargetLowering &TLI) { + SDValue N, const TargetLowering *TLI) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (N.getValueType() == VT) return true; // Handle the case when VT is iPTR. - return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy(); + return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(); } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const TargetLowering &TLI, + SDValue N, const TargetLowering *TLI, unsigned ChildNo) { if (ChildNo >= N.getNumOperands()) return false; // Match fails if out of range child #. return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI); } - LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { @@ -2008,13 +2210,13 @@ CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const TargetLowering &TLI) { + SDValue N, const TargetLowering *TLI) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (cast<VTSDNode>(N)->getVT() == VT) return true; // Handle the case when VT is iPTR. - return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy(); + return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(); } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool @@ -2072,6 +2274,13 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckSame: Result = !::CheckSame(Table, Index, N, RecordedNodes); return Index; + case SelectionDAGISel::OPC_CheckChild0Same: + case SelectionDAGISel::OPC_CheckChild1Same: + case SelectionDAGISel::OPC_CheckChild2Same: + case SelectionDAGISel::OPC_CheckChild3Same: + Result = !::CheckChildSame(Table, Index, N, RecordedNodes, + Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same); + return Index; case SelectionDAGISel::OPC_CheckPatternPredicate: Result = !::CheckPatternPredicate(Table, Index, SDISel); return Index; @@ -2082,7 +2291,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, Result = !::CheckOpcode(Table, Index, N.getNode()); return Index; case SelectionDAGISel::OPC_CheckType: - Result = !::CheckType(Table, Index, N, SDISel.TLI); + Result = !::CheckType(Table, Index, N, SDISel.getTargetLowering()); return Index; case SelectionDAGISel::OPC_CheckChild0Type: case SelectionDAGISel::OPC_CheckChild1Type: @@ -2092,14 +2301,14 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckChild5Type: case SelectionDAGISel::OPC_CheckChild6Type: case SelectionDAGISel::OPC_CheckChild7Type: - Result = !::CheckChildType(Table, Index, N, SDISel.TLI, + Result = !::CheckChildType(Table, Index, N, SDISel.getTargetLowering(), Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type); return Index; case SelectionDAGISel::OPC_CheckCondCode: Result = !::CheckCondCode(Table, Index, N); return Index; case SelectionDAGISel::OPC_CheckValueType: - Result = !::CheckValueType(Table, Index, N, SDISel.TLI); + Result = !::CheckValueType(Table, Index, N, SDISel.getTargetLowering()); return Index; case SelectionDAGISel::OPC_CheckInteger: Result = !::CheckInteger(Table, Index, N); @@ -2369,6 +2578,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckSame: if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break; continue; + + case OPC_CheckChild0Same: case OPC_CheckChild1Same: + case OPC_CheckChild2Same: case OPC_CheckChild3Same: + if (!::CheckChildSame(MatcherTable, MatcherIndex, N, RecordedNodes, + Opcode-OPC_CheckChild0Same)) + break; + continue; + case OPC_CheckPatternPredicate: if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break; continue; @@ -2392,7 +2609,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, continue; case OPC_CheckType: - if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break; + if (!::CheckType(MatcherTable, MatcherIndex, N, getTargetLowering())) + break; continue; case OPC_SwitchOpcode: { @@ -2427,7 +2645,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, } case OPC_SwitchType: { - MVT CurNodeVT = N.getValueType().getSimpleVT(); + MVT CurNodeVT = N.getSimpleValueType(); unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; unsigned CaseSize; while (1) { @@ -2439,7 +2657,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (CaseVT == MVT::iPTR) - CaseVT = TLI.getPointerTy(); + CaseVT = getTargetLowering()->getPointerTy(); // If the VT matches, then we will execute this case. if (CurNodeVT == CaseVT) @@ -2461,7 +2679,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckChild2Type: case OPC_CheckChild3Type: case OPC_CheckChild4Type: case OPC_CheckChild5Type: case OPC_CheckChild6Type: case OPC_CheckChild7Type: - if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI, + if (!::CheckChildType(MatcherTable, MatcherIndex, N, getTargetLowering(), Opcode-OPC_CheckChild0Type)) break; continue; @@ -2469,7 +2687,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break; continue; case OPC_CheckValueType: - if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break; + if (!::CheckValueType(MatcherTable, MatcherIndex, N, getTargetLowering())) + break; continue; case OPC_CheckInteger: if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break; @@ -2538,7 +2757,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitConvertToTarget: { // Convert from IMM/FPIMM to target version. unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitConvertToTarget"); SDValue Imm = RecordedNodes[RecNo].first; if (Imm->getOpcode() == ISD::Constant) { @@ -2563,7 +2782,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Read all of the chained nodes. unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); // FIXME: What if other value results of the node have uses not matched @@ -2600,7 +2819,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Read all of the chained nodes. for (unsigned i = 0; i != NumChains; ++i) { unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); // FIXME: What if other value results of the node have uses not matched @@ -2627,13 +2846,13 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitCopyToReg: { unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg"); unsigned DestPhysReg = MatcherTable[MatcherIndex++]; if (InputChain.getNode() == 0) InputChain = CurDAG->getEntryNode(); - InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(), + InputChain = CurDAG->getCopyToReg(InputChain, SDLoc(NodeToMatch), DestPhysReg, RecordedNodes[RecNo].first, InputGlue); @@ -2644,7 +2863,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitNodeXForm: { unsigned XFormNo = MatcherTable[MatcherIndex++]; unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm"); SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo); RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0)); continue; @@ -2661,7 +2880,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, for (unsigned i = 0; i != NumVTs; ++i) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; - if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy; + if (VT == MVT::iPTR) VT = getTargetLowering()->getPointerTy().SimpleTy; VTs.push_back(VT); } @@ -2720,7 +2939,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (Opcode != OPC_MorphNodeTo) { // If this is a normal EmitNode command, just create the new node and // add the results to the RecordedNodes list. - Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(), + Res = CurDAG->getMachineNode(TargetOpc, SDLoc(NodeToMatch), VTList, Ops); // Add all the non-glue/non-chain results to the RecordedNodes list. @@ -2763,8 +2982,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, bool mayStore = MCID.mayStore(); unsigned NumMemRefs = 0; - for (SmallVector<MachineMemOperand*, 2>::const_iterator I = - MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { + for (SmallVectorImpl<MachineMemOperand *>::const_iterator I = + MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { if ((*I)->isLoad()) { if (mayLoad) ++NumMemRefs; @@ -2780,8 +2999,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, MF->allocateMemRefsArray(NumMemRefs); MachineSDNode::mmo_iterator MemRefsPos = MemRefs; - for (SmallVector<MachineMemOperand*, 2>::const_iterator I = - MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { + for (SmallVectorImpl<MachineMemOperand *>::const_iterator I = + MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { if ((*I)->isLoad()) { if (mayLoad) *MemRefsPos++ = *I; @@ -2821,7 +3040,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (RecNo & 128) RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid MarkGlueResults"); GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); } continue; @@ -2838,7 +3057,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (ResSlot & 128) ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex); - assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame"); + assert(ResSlot < RecordedNodes.size() && "Invalid CompleteMatch"); SDValue Res = RecordedNodes[ResSlot].first; assert(i < NodeToMatch->getNumValues() && diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index f5fc66c..82b068d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -64,13 +64,29 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, return isUsedByReturnOnly(Node, Chain); } +/// \brief Set CallLoweringInfo attribute flags based on a call instruction +/// and called function attributes. +void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, + unsigned AttrIdx) { + isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt); + isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt); + isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg); + isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); + isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); + isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); + Alignment = CS->getParamAlignment(AttrIdx); +} /// Generate a libcall taking the given operands as arguments and returning a /// result of type RetVT. -SDValue TargetLowering::makeLibCall(SelectionDAG &DAG, - RTLIB::Libcall LC, EVT RetVT, - const SDValue *Ops, unsigned NumOps, - bool isSigned, DebugLoc dl) const { +std::pair<SDValue, SDValue> +TargetLowering::makeLibCall(SelectionDAG &DAG, + RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, SDLoc dl, + bool doesNotReturn, + bool isReturnValueUsed) const { TargetLowering::ArgListTy Args; Args.reserve(NumOps); @@ -89,11 +105,9 @@ SDValue TargetLowering::makeLibCall(SelectionDAG &DAG, CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, false, 0, getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); - std::pair<SDValue,SDValue> CallInfo = LowerCallTo(CLI); - - return CallInfo.first; + doesNotReturn, isReturnValueUsed, Callee, Args, + DAG, dl); + return LowerCallTo(CLI); } @@ -102,7 +116,7 @@ SDValue TargetLowering::makeLibCall(SelectionDAG &DAG, void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - DebugLoc dl) const { + SDLoc dl) const { assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) && "Unsupported setcc type!"); @@ -183,14 +197,18 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, // Use the target specific return value for comparions lib calls. EVT RetVT = getCmpLibcallReturnType(); SDValue Ops[2] = { NewLHS, NewRHS }; - NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, + dl).first; NewRHS = DAG.getConstant(0, RetVT); CCCode = getCmpLibcallCC(LC1); if (LC2 != RTLIB::UNKNOWN_LIBCALL) { - SDValue Tmp = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), + SDValue Tmp = DAG.getNode(ISD::SETCC, dl, + getSetCCResultType(*DAG.getContext(), RetVT), NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl); - NewLHS = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), NewLHS, + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, + dl).first; + NewLHS = DAG.getNode(ISD::SETCC, dl, + getSetCCResultType(*DAG.getContext(), RetVT), NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2))); NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); NewRHS = SDValue(); @@ -262,7 +280,7 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// constant and return true. bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // FIXME: ISD::SELECT, ISD::SELECT_CC switch (Op.getOpcode()) { @@ -302,7 +320,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, - DebugLoc dl) { + SDLoc dl) { assert(Op.getNumOperands() == 2 && "ShrinkDemandedOp only supports binary operators!"); assert(Op.getNode()->getNumValues() == 1 && @@ -356,7 +374,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth && "Mask size mismatches value type size!"); APInt NewMask = DemandedMask; - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Don't know anything. KnownZero = KnownOne = APInt(BitWidth, 0); @@ -508,7 +526,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 // NB: it is okay if more bits are known than are requested - if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side + if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side if (KnownOne == KnownOne2) { // set bits are the same on both sides EVT VT = Op.getValueType(); SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); @@ -630,6 +648,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), NarrowShl)); } + // Repeat the SHL optimization above in cases where an extension + // intervenes: (shl (anyext (shr x, c1)), c2) to + // (shl (anyext x), c2-c1). This requires that the bottom c1 bits + // aren't demanded (as above) and that the shifted upper c1 bits of + // x aren't demanded. + if (InOp.hasOneUse() && + InnerOp.getOpcode() == ISD::SRL && + InnerOp.hasOneUse() && + isa<ConstantSDNode>(InnerOp.getOperand(1))) { + uint64_t InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1)) + ->getZExtValue(); + if (InnerShAmt < ShAmt && + InnerShAmt < InnerBits && + NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 && + NewMask.trunc(ShAmt) == 0) { + SDValue NewSA = + TLO.DAG.getConstant(ShAmt - InnerShAmt, + Op.getOperand(1).getValueType()); + EVT VT = Op.getValueType(); + SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, + InnerOp.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, + NewExt, NewSA)); + } + } } KnownZero <<= SA->getZExtValue(); @@ -720,13 +763,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. - if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { + if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), Op.getOperand(1))); - } else if (KnownOne.intersects(SignBit)) { // New bits are known one. - KnownOne |= HighBits; + + int Log2 = NewMask.exactLogBase2(); + if (Log2 >= 0) { + // The bit must come from the sign. + SDValue NewSA = + TLO.DAG.getConstant(BitWidth - 1 - Log2, + Op.getOperand(1).getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, + Op.getOperand(0), NewSA)); } + + if (KnownOne.intersects(SignBit)) + // New bits are known one. + KnownOne |= HighBits; } break; case ISD::SIGN_EXTEND_INREG: { @@ -1066,7 +1120,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, - DAGCombinerInfo &DCI, DebugLoc dl) const { + DAGCombinerInfo &DCI, SDLoc dl) const { SelectionDAG &DAG = DCI.DAG; // These setcc operations always fold. @@ -1075,13 +1129,20 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, case ISD::SETFALSE: case ISD::SETFALSE2: return DAG.getConstant(0, VT); case ISD::SETTRUE: - case ISD::SETTRUE2: return DAG.getConstant(1, VT); + case ISD::SETTRUE2: { + TargetLowering::BooleanContent Cnt = getBooleanContents(VT.isVector()); + return DAG.getConstant( + Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); + } } // Ensure that the constant occurs on the RHS, and fold constant // comparisons. - if (isa<ConstantSDNode>(N0.getNode())) - return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); + ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); + if (isa<ConstantSDNode>(N0.getNode()) && + (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) + return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); @@ -1160,7 +1221,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } // Make sure we're not losing bits from the constant. - if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) { + if (MinBits > 0 && + MinBits < C1.getBitWidth() && MinBits >= C1.getActiveBits()) { EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { // Will get folded away. @@ -1175,6 +1237,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // the test is for equality or unsigned, and all 1 bits of the const are // in the same partial word, see if we can shorten the load. if (DCI.isBeforeLegalize() && + !ISD::isSignedIntSetCC(Cond) && N0.getOpcode() == ISD::AND && C1 == 0 && N0.getNode()->hasOneUse() && isa<LoadSDNode>(N0.getOperand(0)) && @@ -1319,7 +1382,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType().isInteger()); - return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); } if ((N0.getOpcode() == ISD::XOR || @@ -1756,16 +1821,22 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) { if (ValueHasExactlyOneBitSet(N1, DAG)) { Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); - SDValue Zero = DAG.getConstant(0, N1.getValueType()); - return DAG.getSetCC(dl, VT, N0, Zero, Cond); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(Cond, N0.getSimpleValueType())) { + SDValue Zero = DAG.getConstant(0, N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, Zero, Cond); + } } } if (N1.getOpcode() == ISD::AND) if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) { if (ValueHasExactlyOneBitSet(N0, DAG)) { Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); - SDValue Zero = DAG.getConstant(0, N0.getValueType()); - return DAG.getSetCC(dl, VT, N1, Zero, Cond); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(Cond, N1.getSimpleValueType())) { + SDValue Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(dl, VT, N1, Zero, Cond); + } } } } @@ -1966,7 +2037,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, int64_t Offs = GA->getOffset(); if (C) Offs += C->getZExtValue(); Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), - C ? C->getDebugLoc() : DebugLoc(), + C ? SDLoc(C) : SDLoc(), Op.getValueType(), Offs)); return; } @@ -1989,8 +2060,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { - if (Constraint[0] != '{') + MVT VT) const { + if (Constraint.empty() || Constraint[0] != '{') return std::make_pair(0u, static_cast<TargetRegisterClass*>(0)); assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); @@ -2139,8 +2210,9 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( break; } } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) { - OpInfo.ConstraintVT = MVT::getIntegerVT( - 8*getDataLayout()->getPointerSize(PT->getAddressSpace())); + unsigned PtrSize + = getDataLayout()->getPointerSizeInBits(PT->getAddressSpace()); + OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize); } else { OpInfo.ConstraintVT = MVT::getVT(OpTy, true); } @@ -2435,9 +2507,9 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, } } -/// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication +/// \brief Given an exact SDIV by a constant, create a multiplication /// with the multiplicative inverse of the constant. -SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, +SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, SelectionDAG &DAG) const { ConstantSDNode *C = cast<ConstantSDNode>(Op2); APInt d = C->getAPIntValue(); @@ -2461,7 +2533,7 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2); } -/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, +/// \brief Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> @@ -2469,7 +2541,7 @@ SDValue TargetLowering:: BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, std::vector<SDNode*> *Created) const { EVT VT = N->getValueType(0); - DebugLoc dl= N->getDebugLoc(); + SDLoc dl(N); // Check to see if we can do this. // FIXME: We should be more aggressive here. @@ -2521,7 +2593,7 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, return DAG.getNode(ISD::ADD, dl, VT, Q, T); } -/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// \brief Given an ISD::UDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> @@ -2529,7 +2601,7 @@ SDValue TargetLowering:: BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, std::vector<SDNode*> *Created) const { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Check to see if we can do this. // FIXME: We should be more aggressive here. diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp deleted file mode 100644 index 2feea59..0000000 --- a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp +++ /dev/null @@ -1,1152 +0,0 @@ -//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a shrink wrapping variant of prolog/epilog insertion: -// - Spills and restores of callee-saved registers (CSRs) are placed in the -// machine CFG to tightly surround their uses so that execution paths that -// do not use CSRs do not pay the spill/restore penalty. -// -// - Avoiding placment of spills/restores in loops: if a CSR is used inside a -// loop the spills are placed in the loop preheader, and restores are -// placed in the loop exit nodes (the successors of loop _exiting_ nodes). -// -// - Covering paths without CSR uses: -// If a region in a CFG uses CSRs and has multiple entry and/or exit points, -// the use info for the CSRs inside the region is propagated outward in the -// CFG to ensure validity of the spill/restore placements. This decreases -// the effectiveness of shrink wrapping but does not require edge splitting -// in the machine CFG. -// -// This shrink wrapping implementation uses an iterative analysis to determine -// which basic blocks require spills and restores for CSRs. -// -// This pass uses MachineDominators and MachineLoopInfo. Loop information -// is used to prevent placement of callee-saved register spills/restores -// in the bodies of loops. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "shrink-wrap" - -#include "PrologEpilogInserter.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SparseBitVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include <sstream> - -using namespace llvm; - -STATISTIC(numSRReduced, "Number of CSR spills+restores reduced."); - -// Shrink Wrapping: -static cl::opt<bool> -ShrinkWrapping("shrink-wrap", - cl::desc("Shrink wrap callee-saved register spills/restores")); - -// Shrink wrap only the specified function, a debugging aid. -static cl::opt<std::string> -ShrinkWrapFunc("shrink-wrap-func", cl::Hidden, - cl::desc("Shrink wrap the specified function"), - cl::value_desc("funcname"), - cl::init("")); - -// Debugging level for shrink wrapping. -enum ShrinkWrapDebugLevel { - Disabled, BasicInfo, Iterations, Details -}; - -static cl::opt<enum ShrinkWrapDebugLevel> -ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden, - cl::desc("Print shrink wrapping debugging information"), - cl::values( - clEnumVal(Disabled , "disable debug output"), - clEnumVal(BasicInfo , "print basic DF sets"), - clEnumVal(Iterations, "print SR sets for each iteration"), - clEnumVal(Details , "print all DF sets"), - clEnumValEnd)); - - -void PEI::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - if (ShrinkWrapping || ShrinkWrapFunc != "") { - AU.addRequired<MachineLoopInfo>(); - AU.addRequired<MachineDominatorTree>(); - } - AU.addPreserved<MachineLoopInfo>(); - AU.addPreserved<MachineDominatorTree>(); - AU.addRequired<TargetPassConfig>(); - MachineFunctionPass::getAnalysisUsage(AU); -} - -//===----------------------------------------------------------------------===// -// ShrinkWrapping implementation -//===----------------------------------------------------------------------===// - -// Convienences for dealing with machine loops. -MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) { - assert(LP && "Machine loop is NULL."); - MachineBasicBlock* PHDR = LP->getLoopPreheader(); - MachineLoop* PLP = LP->getParentLoop(); - while (PLP) { - PHDR = PLP->getLoopPreheader(); - PLP = PLP->getParentLoop(); - } - return PHDR; -} - -MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) { - if (LP == 0) - return 0; - MachineLoop* PLP = LP->getParentLoop(); - while (PLP) { - LP = PLP; - PLP = PLP->getParentLoop(); - } - return LP; -} - -bool PEI::isReturnBlock(MachineBasicBlock* MBB) { - return (MBB && !MBB->empty() && MBB->back().isReturn()); -} - -// Initialize shrink wrapping DFA sets, called before iterations. -void PEI::clearAnticAvailSets() { - AnticIn.clear(); - AnticOut.clear(); - AvailIn.clear(); - AvailOut.clear(); -} - -// Clear all sets constructed by shrink wrapping. -void PEI::clearAllSets() { - ReturnBlocks.clear(); - clearAnticAvailSets(); - UsedCSRegs.clear(); - CSRUsed.clear(); - TLLoops.clear(); - CSRSave.clear(); - CSRRestore.clear(); -} - -// Initialize all shrink wrapping data. -void PEI::initShrinkWrappingInfo() { - clearAllSets(); - EntryBlock = 0; -#ifndef NDEBUG - HasFastExitPath = false; -#endif - ShrinkWrapThisFunction = ShrinkWrapping; - // DEBUG: enable or disable shrink wrapping for the current function - // via --shrink-wrap-func=<funcname>. -#ifndef NDEBUG - if (ShrinkWrapFunc != "") { - std::string MFName = MF->getName().str(); - ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); - } -#endif -} - - -/// placeCSRSpillsAndRestores - determine which MBBs of the function -/// need save, restore code for callee-saved registers by doing a DF analysis -/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs -/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo -/// is used to ensure that CSR save/restore code is not placed inside loops. -/// This function computes the maps of MBBs -> CSRs to spill and restore -/// in CSRSave, CSRRestore. -/// -/// If shrink wrapping is not being performed, place all spills in -/// the entry block, all restores in return blocks. In this case, -/// CSRSave has a single mapping, CSRRestore has mappings for each -/// return block. -/// -void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { - - DEBUG(MF = &Fn); - - initShrinkWrappingInfo(); - - DEBUG(if (ShrinkWrapThisFunction) { - dbgs() << "Place CSR spills/restores for " - << MF->getName() << "\n"; - }); - - if (calculateSets(Fn)) - placeSpillsAndRestores(Fn); -} - -/// calcAnticInOut - calculate the anticipated in/out reg sets -/// for the given MBB by looking forward in the MCFG at MBB's -/// successors. -/// -bool PEI::calcAnticInOut(MachineBasicBlock* MBB) { - bool changed = false; - - // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB)) - SmallVector<MachineBasicBlock*, 4> successors; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (SUCC != MBB) - successors.push_back(SUCC); - } - - unsigned i = 0, e = successors.size(); - if (i != e) { - CSRegSet prevAnticOut = AnticOut[MBB]; - MachineBasicBlock* SUCC = successors[i]; - - AnticOut[MBB] = AnticIn[SUCC]; - for (++i; i != e; ++i) { - SUCC = successors[i]; - AnticOut[MBB] &= AnticIn[SUCC]; - } - if (prevAnticOut != AnticOut[MBB]) - changed = true; - } - - // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]); - CSRegSet prevAnticIn = AnticIn[MBB]; - AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; - if (prevAnticIn != AnticIn[MBB]) - changed = true; - return changed; -} - -/// calcAvailInOut - calculate the available in/out reg sets -/// for the given MBB by looking backward in the MCFG at MBB's -/// predecessors. -/// -bool PEI::calcAvailInOut(MachineBasicBlock* MBB) { - bool changed = false; - - // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB)) - SmallVector<MachineBasicBlock*, 4> predecessors; - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - if (PRED != MBB) - predecessors.push_back(PRED); - } - - unsigned i = 0, e = predecessors.size(); - if (i != e) { - CSRegSet prevAvailIn = AvailIn[MBB]; - MachineBasicBlock* PRED = predecessors[i]; - - AvailIn[MBB] = AvailOut[PRED]; - for (++i; i != e; ++i) { - PRED = predecessors[i]; - AvailIn[MBB] &= AvailOut[PRED]; - } - if (prevAvailIn != AvailIn[MBB]) - changed = true; - } - - // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]); - CSRegSet prevAvailOut = AvailOut[MBB]; - AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; - if (prevAvailOut != AvailOut[MBB]) - changed = true; - return changed; -} - -/// calculateAnticAvail - build the sets anticipated and available -/// registers in the MCFG of the current function iteratively, -/// doing a combined forward and backward analysis. -/// -void PEI::calculateAnticAvail(MachineFunction &Fn) { - // Initialize data flow sets. - clearAnticAvailSets(); - - // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. - bool changed = true; - unsigned iterations = 0; - while (changed) { - changed = false; - ++iterations; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - - // Calculate anticipated in, out regs at MBB from - // anticipated at successors of MBB. - changed |= calcAnticInOut(MBB); - - // Calculate available in, out regs at MBB from - // available at predecessors of MBB. - changed |= calcAvailInOut(MBB); - } - } - - DEBUG({ - if (ShrinkWrapDebugging >= Details) { - dbgs() - << "-----------------------------------------------------------\n" - << " Antic/Avail Sets:\n" - << "-----------------------------------------------------------\n" - << "iterations = " << iterations << "\n" - << "-----------------------------------------------------------\n" - << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n" - << "-----------------------------------------------------------\n"; - - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - dumpSets(MBB); - } - - dbgs() - << "-----------------------------------------------------------\n"; - } - }); -} - -/// propagateUsesAroundLoop - copy used register info from MBB to all blocks -/// of the loop given by LP and its parent loops. This prevents spills/restores -/// from being placed in the bodies of loops. -/// -void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) { - if (! MBB || !LP) - return; - - std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks(); - for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) { - MachineBasicBlock* LBB = loopBlocks[i]; - if (LBB == MBB) - continue; - if (CSRUsed[LBB].contains(CSRUsed[MBB])) - continue; - CSRUsed[LBB] |= CSRUsed[MBB]; - } -} - -/// calculateSets - collect the CSRs used in this function, compute -/// the DF sets that describe the initial minimal regions in the -/// Machine CFG around which CSR spills and restores must be placed. -/// -/// Additionally, this function decides if shrink wrapping should -/// be disabled for the current function, checking the following: -/// 1. the current function has more than 500 MBBs: heuristic limit -/// on function size to reduce compile time impact of the current -/// iterative algorithm. -/// 2. all CSRs are used in the entry block. -/// 3. all CSRs are used in all immediate successors of the entry block. -/// 4. all CSRs are used in a subset of blocks, each of which dominates -/// all return blocks. These blocks, taken as a subgraph of the MCFG, -/// are equivalent to the entry block since all execution paths pass -/// through them. -/// -bool PEI::calculateSets(MachineFunction &Fn) { - // Sets used to compute spill, restore placement sets. - const std::vector<CalleeSavedInfo> CSI = - Fn.getFrameInfo()->getCalleeSavedInfo(); - - // If no CSRs used, we are done. - if (CSI.empty()) { - DEBUG(if (ShrinkWrapThisFunction) - dbgs() << "DISABLED: " << Fn.getName() - << ": uses no callee-saved registers\n"); - return false; - } - - // Save refs to entry and return blocks. - EntryBlock = Fn.begin(); - for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); - MBB != E; ++MBB) - if (isReturnBlock(MBB)) - ReturnBlocks.push_back(MBB); - - // Determine if this function has fast exit paths. - DEBUG(if (ShrinkWrapThisFunction) - findFastExitPath()); - - // Limit shrink wrapping via the current iterative bit vector - // implementation to functions with <= 500 MBBs. - if (Fn.size() > 500) { - DEBUG(if (ShrinkWrapThisFunction) - dbgs() << "DISABLED: " << Fn.getName() - << ": too large (" << Fn.size() << " MBBs)\n"); - ShrinkWrapThisFunction = false; - } - - // Return now if not shrink wrapping. - if (! ShrinkWrapThisFunction) - return false; - - // Collect set of used CSRs. - for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { - UsedCSRegs.set(inx); - } - - // Walk instructions in all MBBs, create CSRUsed[] sets, choose - // whether or not to shrink wrap this function. - MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); - MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>(); - const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); - - bool allCSRUsesInEntryBlock = true; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) { - for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { - unsigned Reg = CSI[inx].getReg(); - // If instruction I reads or modifies Reg, add it to UsedCSRegs, - // CSRUsed map for the current block. - for (unsigned opInx = 0, opEnd = I->getNumOperands(); - opInx != opEnd; ++opInx) { - const MachineOperand &MO = I->getOperand(opInx); - if (! (MO.isReg() && (MO.isUse() || MO.isDef()))) - continue; - unsigned MOReg = MO.getReg(); - if (!MOReg) - continue; - if (MOReg == Reg || - (TargetRegisterInfo::isPhysicalRegister(MOReg) && - TargetRegisterInfo::isPhysicalRegister(Reg) && - TRI->isSubRegister(Reg, MOReg))) { - // CSR Reg is defined/used in block MBB. - CSRUsed[MBB].set(inx); - // Check for uses in EntryBlock. - if (MBB != EntryBlock) - allCSRUsesInEntryBlock = false; - } - } - } - } - - if (CSRUsed[MBB].empty()) - continue; - - // Propagate CSRUsed[MBB] in loops - if (MachineLoop* LP = LI.getLoopFor(MBB)) { - // Add top level loop to work list. - MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP); - MachineLoop* PLP = getTopLevelLoopParent(LP); - - if (! HDR) { - HDR = PLP->getHeader(); - assert(HDR->pred_size() > 0 && "Loop header has no predecessors?"); - MachineBasicBlock::pred_iterator PI = HDR->pred_begin(); - HDR = *PI; - } - TLLoops[HDR] = PLP; - - // Push uses from inside loop to its parent loops, - // or to all other MBBs in its loop. - if (LP->getLoopDepth() > 1) { - for (MachineLoop* PLP = LP->getParentLoop(); PLP; - PLP = PLP->getParentLoop()) { - propagateUsesAroundLoop(MBB, PLP); - } - } else { - propagateUsesAroundLoop(MBB, LP); - } - } - } - - if (allCSRUsesInEntryBlock) { - DEBUG(dbgs() << "DISABLED: " << Fn.getName() - << ": all CSRs used in EntryBlock\n"); - ShrinkWrapThisFunction = false; - } else { - bool allCSRsUsedInEntryFanout = true; - for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), - SE = EntryBlock->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (CSRUsed[SUCC] != UsedCSRegs) - allCSRsUsedInEntryFanout = false; - } - if (allCSRsUsedInEntryFanout) { - DEBUG(dbgs() << "DISABLED: " << Fn.getName() - << ": all CSRs used in imm successors of EntryBlock\n"); - ShrinkWrapThisFunction = false; - } - } - - if (ShrinkWrapThisFunction) { - // Check if MBB uses CSRs and dominates all exit nodes. - // Such nodes are equiv. to the entry node w.r.t. - // CSR uses: every path through the function must - // pass through this node. If each CSR is used at least - // once by these nodes, shrink wrapping is disabled. - CSRegSet CSRUsedInChokePoints; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1) - continue; - bool dominatesExitNodes = true; - for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) - if (! DT.dominates(MBB, ReturnBlocks[ri])) { - dominatesExitNodes = false; - break; - } - if (dominatesExitNodes) { - CSRUsedInChokePoints |= CSRUsed[MBB]; - if (CSRUsedInChokePoints == UsedCSRegs) { - DEBUG(dbgs() << "DISABLED: " << Fn.getName() - << ": all CSRs used in choke point(s) at " - << getBasicBlockName(MBB) << "\n"); - ShrinkWrapThisFunction = false; - break; - } - } - } - } - - // Return now if we have decided not to apply shrink wrapping - // to the current function. - if (! ShrinkWrapThisFunction) - return false; - - DEBUG({ - dbgs() << "ENABLED: " << Fn.getName(); - if (HasFastExitPath) - dbgs() << " (fast exit path)"; - dbgs() << "\n"; - if (ShrinkWrapDebugging >= BasicInfo) { - dbgs() << "------------------------------" - << "-----------------------------\n"; - dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; - if (ShrinkWrapDebugging >= Details) { - dbgs() << "------------------------------" - << "-----------------------------\n"; - dumpAllUsed(); - } - } - }); - - // Build initial DF sets to determine minimal regions in the - // Machine CFG around which CSRs must be spilled and restored. - calculateAnticAvail(Fn); - - return true; -} - -/// addUsesForMEMERegion - add uses of CSRs spilled or restored in -/// multi-entry, multi-exit (MEME) regions so spill and restore -/// placement will not break code that enters or leaves a -/// shrink-wrapped region by inducing spills with no matching -/// restores or restores with no matching spills. A MEME region -/// is a subgraph of the MCFG with multiple entry edges, multiple -/// exit edges, or both. This code propagates use information -/// through the MCFG until all paths requiring spills and restores -/// _outside_ the computed minimal placement regions have been covered. -/// -bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4>& blks) { - if (MBB->succ_size() < 2 && MBB->pred_size() < 2) { - bool processThisBlock = false; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (SUCC->pred_size() > 1) { - processThisBlock = true; - break; - } - } - if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) { - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - if (PRED->succ_size() > 1) { - processThisBlock = true; - break; - } - } - } - if (! processThisBlock) - return false; - } - - CSRegSet prop; - if (!CSRSave[MBB].empty()) - prop = CSRSave[MBB]; - else if (!CSRRestore[MBB].empty()) - prop = CSRRestore[MBB]; - else - prop = CSRUsed[MBB]; - if (prop.empty()) - return false; - - // Propagate selected bits to successors, predecessors of MBB. - bool addedUses = false; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - // Self-loop - if (SUCC == MBB) - continue; - if (! CSRUsed[SUCC].contains(prop)) { - CSRUsed[SUCC] |= prop; - addedUses = true; - blks.push_back(SUCC); - DEBUG(if (ShrinkWrapDebugging >= Iterations) - dbgs() << getBasicBlockName(MBB) - << "(" << stringifyCSRegSet(prop) << ")->" - << "successor " << getBasicBlockName(SUCC) << "\n"); - } - } - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - // Self-loop - if (PRED == MBB) - continue; - if (! CSRUsed[PRED].contains(prop)) { - CSRUsed[PRED] |= prop; - addedUses = true; - blks.push_back(PRED); - DEBUG(if (ShrinkWrapDebugging >= Iterations) - dbgs() << getBasicBlockName(MBB) - << "(" << stringifyCSRegSet(prop) << ")->" - << "predecessor " << getBasicBlockName(PRED) << "\n"); - } - } - return addedUses; -} - -/// addUsesForTopLevelLoops - add uses for CSRs used inside top -/// level loops to the exit blocks of those loops. -/// -bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) { - bool addedUses = false; - - // Place restores for top level loops where needed. - for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator - I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) { - MachineBasicBlock* MBB = I->first; - MachineLoop* LP = I->second; - MachineBasicBlock* HDR = LP->getHeader(); - SmallVector<MachineBasicBlock*, 4> exitBlocks; - CSRegSet loopSpills; - - loopSpills = CSRSave[MBB]; - if (CSRSave[MBB].empty()) { - loopSpills = CSRUsed[HDR]; - assert(!loopSpills.empty() && "No CSRs used in loop?"); - } else if (CSRRestore[MBB].contains(CSRSave[MBB])) - continue; - - LP->getExitBlocks(exitBlocks); - assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?"); - for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) { - MachineBasicBlock* EXB = exitBlocks[i]; - if (! CSRUsed[EXB].contains(loopSpills)) { - CSRUsed[EXB] |= loopSpills; - addedUses = true; - DEBUG(if (ShrinkWrapDebugging >= Iterations) - dbgs() << "LOOP " << getBasicBlockName(MBB) - << "(" << stringifyCSRegSet(loopSpills) << ")->" - << getBasicBlockName(EXB) << "\n"); - if (EXB->succ_size() > 1 || EXB->pred_size() > 1) - blks.push_back(EXB); - } - } - } - return addedUses; -} - -/// calcSpillPlacements - determine which CSRs should be spilled -/// in MBB using AnticIn sets of MBB's predecessors, keeping track -/// of changes to spilled reg sets. Add MBB to the set of blocks -/// that need to be processed for propagating use info to cover -/// multi-entry/exit regions. -/// -bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4> &blks, - CSRegBlockMap &prevSpills) { - bool placedSpills = false; - // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB) - CSRegSet anticInPreds; - SmallVector<MachineBasicBlock*, 4> predecessors; - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - if (PRED != MBB) - predecessors.push_back(PRED); - } - unsigned i = 0, e = predecessors.size(); - if (i != e) { - MachineBasicBlock* PRED = predecessors[i]; - anticInPreds = UsedCSRegs - AnticIn[PRED]; - for (++i; i != e; ++i) { - PRED = predecessors[i]; - anticInPreds &= (UsedCSRegs - AnticIn[PRED]); - } - } else { - // Handle uses in entry blocks (which have no predecessors). - // This is necessary because the DFA formulation assumes the - // entry and (multiple) exit nodes cannot have CSR uses, which - // is not the case in the real world. - anticInPreds = UsedCSRegs; - } - // Compute spills required at MBB: - CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds; - - if (! CSRSave[MBB].empty()) { - if (MBB == EntryBlock) { - for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) - CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB]; - } else { - // Reset all regs spilled in MBB that are also spilled in EntryBlock. - if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) { - CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock]; - } - } - } - placedSpills = (CSRSave[MBB] != prevSpills[MBB]); - prevSpills[MBB] = CSRSave[MBB]; - // Remember this block for adding restores to successor - // blocks for multi-entry region. - if (placedSpills) - blks.push_back(MBB); - - DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) - dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]) << "\n"); - - return placedSpills; -} - -/// calcRestorePlacements - determine which CSRs should be restored -/// in MBB using AvailOut sets of MBB's succcessors, keeping track -/// of changes to restored reg sets. Add MBB to the set of blocks -/// that need to be processed for propagating use info to cover -/// multi-entry/exit regions. -/// -bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4> &blks, - CSRegBlockMap &prevRestores) { - bool placedRestores = false; - // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB) - CSRegSet availOutSucc; - SmallVector<MachineBasicBlock*, 4> successors; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (SUCC != MBB) - successors.push_back(SUCC); - } - unsigned i = 0, e = successors.size(); - if (i != e) { - MachineBasicBlock* SUCC = successors[i]; - availOutSucc = UsedCSRegs - AvailOut[SUCC]; - for (++i; i != e; ++i) { - SUCC = successors[i]; - availOutSucc &= (UsedCSRegs - AvailOut[SUCC]); - } - } else { - if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) { - // Handle uses in return blocks (which have no successors). - // This is necessary because the DFA formulation assumes the - // entry and (multiple) exit nodes cannot have CSR uses, which - // is not the case in the real world. - availOutSucc = UsedCSRegs; - } - } - // Compute restores required at MBB: - CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc; - - // Postprocess restore placements at MBB. - // Remove the CSRs that are restored in the return blocks. - // Lest this be confusing, note that: - // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks. - if (MBB->succ_size() && ! CSRRestore[MBB].empty()) { - if (! CSRSave[EntryBlock].empty()) - CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock]; - } - placedRestores = (CSRRestore[MBB] != prevRestores[MBB]); - prevRestores[MBB] = CSRRestore[MBB]; - // Remember this block for adding saves to predecessor - // blocks for multi-entry region. - if (placedRestores) - blks.push_back(MBB); - - DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) - dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); - - return placedRestores; -} - -/// placeSpillsAndRestores - place spills and restores of CSRs -/// used in MBBs in minimal regions that contain the uses. -/// -void PEI::placeSpillsAndRestores(MachineFunction &Fn) { - CSRegBlockMap prevCSRSave; - CSRegBlockMap prevCSRRestore; - SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks; - bool changed = true; - unsigned iterations = 0; - - // Iterate computation of spill and restore placements in the MCFG until: - // 1. CSR use info has been fully propagated around the MCFG, and - // 2. computation of CSRSave[], CSRRestore[] reach fixed points. - while (changed) { - changed = false; - ++iterations; - - DEBUG(if (ShrinkWrapDebugging >= Iterations) - dbgs() << "iter " << iterations - << " --------------------------------------------------\n"); - - // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, - // which determines the placements of spills and restores. - // Keep track of changes to spills, restores in each iteration to - // minimize the total iterations. - bool SRChanged = false; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - - // Place spills for CSRs in MBB. - SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave); - - // Place restores for CSRs in MBB. - SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore); - } - - // Add uses of CSRs used inside loops where needed. - changed |= addUsesForTopLevelLoops(cvBlocks); - - // Add uses for CSRs spilled or restored at branch, join points. - if (changed || SRChanged) { - while (! cvBlocks.empty()) { - MachineBasicBlock* MBB = cvBlocks.pop_back_val(); - changed |= addUsesForMEMERegion(MBB, ncvBlocks); - } - if (! ncvBlocks.empty()) { - cvBlocks = ncvBlocks; - ncvBlocks.clear(); - } - } - - if (changed) { - calculateAnticAvail(Fn); - CSRSave.clear(); - CSRRestore.clear(); - } - } - - // Check for effectiveness: - // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks} - // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock] - // Gives a measure of how many CSR spills have been moved from EntryBlock - // to minimal regions enclosing their uses. - CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]); - unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); - numSRReduced += numSRReducedThisFunc; - DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { - dbgs() << "-----------------------------------------------------------\n"; - dbgs() << "total iterations = " << iterations << " ( " - << Fn.getName() - << " " << numSRReducedThisFunc - << " " << Fn.size() - << " )\n"; - dbgs() << "-----------------------------------------------------------\n"; - dumpSRSets(); - dbgs() << "-----------------------------------------------------------\n"; - if (numSRReducedThisFunc) - verifySpillRestorePlacement(); - }); -} - -// Debugging methods. -#ifndef NDEBUG -/// findFastExitPath - debugging method used to detect functions -/// with at least one path from the entry block to a return block -/// directly or which has a very small number of edges. -/// -void PEI::findFastExitPath() { - if (! EntryBlock) - return; - // Fina a path from EntryBlock to any return block that does not branch: - // Entry - // | ... - // v | - // B1<-----+ - // | - // v - // Return - for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), - SE = EntryBlock->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - - // Assume positive, disprove existence of fast path. - HasFastExitPath = true; - - // Check the immediate successors. - if (isReturnBlock(SUCC)) { - if (ShrinkWrapDebugging >= BasicInfo) - dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) - << "->" << getBasicBlockName(SUCC) << "\n"; - break; - } - // Traverse df from SUCC, look for a branch block. - std::string exitPath = getBasicBlockName(SUCC); - for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC), - BE = df_end(SUCC); BI != BE; ++BI) { - MachineBasicBlock* SBB = *BI; - // Reject paths with branch nodes. - if (SBB->succ_size() > 1) { - HasFastExitPath = false; - break; - } - exitPath += "->" + getBasicBlockName(SBB); - } - if (HasFastExitPath) { - if (ShrinkWrapDebugging >= BasicInfo) - dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) - << "->" << exitPath << "\n"; - break; - } - } -} - -/// verifySpillRestorePlacement - check the current spill/restore -/// sets for safety. Attempt to find spills without restores or -/// restores without spills. -/// Spills: walk df from each MBB in spill set ensuring that -/// all CSRs spilled at MMBB are restored on all paths -/// from MBB to all exit blocks. -/// Restores: walk idf from each MBB in restore set ensuring that -/// all CSRs restored at MBB are spilled on all paths -/// reaching MBB. -/// -void PEI::verifySpillRestorePlacement() { - unsigned numReturnBlocks = 0; - for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - if (isReturnBlock(MBB) || MBB->succ_size() == 0) - ++numReturnBlocks; - } - for (CSRegBlockMap::iterator BI = CSRSave.begin(), - BE = CSRSave.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet spilled = BI->second; - CSRegSet restored; - - if (spilled.empty()) - continue; - - DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(spilled) - << " RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); - - if (CSRRestore[MBB].intersects(spilled)) { - restored |= (CSRRestore[MBB] & spilled); - } - - // Walk depth first from MBB to find restores of all CSRs spilled at MBB: - // we must find restores for all spills w/no intervening spills on all - // paths from MBB to all return blocks. - for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB), - BE = df_end(MBB); BI != BE; ++BI) { - MachineBasicBlock* SBB = *BI; - if (SBB == MBB) - continue; - // Stop when we encounter spills of any CSRs spilled at MBB that - // have not yet been seen to be restored. - if (CSRSave[SBB].intersects(spilled) && - !restored.contains(CSRSave[SBB] & spilled)) - break; - // Collect the CSRs spilled at MBB that are restored - // at this DF successor of MBB. - if (CSRRestore[SBB].intersects(spilled)) - restored |= (CSRRestore[SBB] & spilled); - // If we are at a retun block, check that the restores - // we have seen so far exhaust the spills at MBB, then - // reset the restores. - if (isReturnBlock(SBB) || SBB->succ_size() == 0) { - if (restored != spilled) { - CSRegSet notRestored = (spilled - restored); - DEBUG(dbgs() << MF->getName() << ": " - << stringifyCSRegSet(notRestored) - << " spilled at " << getBasicBlockName(MBB) - << " are never restored on path to return " - << getBasicBlockName(SBB) << "\n"); - } - restored.clear(); - } - } - } - - // Check restore placements. - for (CSRegBlockMap::iterator BI = CSRRestore.begin(), - BE = CSRRestore.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet restored = BI->second; - CSRegSet spilled; - - if (restored.empty()) - continue; - - DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]) - << " RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(restored) << "\n"); - - if (CSRSave[MBB].intersects(restored)) { - spilled |= (CSRSave[MBB] & restored); - } - // Walk inverse depth first from MBB to find spills of all - // CSRs restored at MBB: - for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB), - BE = idf_end(MBB); BI != BE; ++BI) { - MachineBasicBlock* PBB = *BI; - if (PBB == MBB) - continue; - // Stop when we encounter restores of any CSRs restored at MBB that - // have not yet been seen to be spilled. - if (CSRRestore[PBB].intersects(restored) && - !spilled.contains(CSRRestore[PBB] & restored)) - break; - // Collect the CSRs restored at MBB that are spilled - // at this DF predecessor of MBB. - if (CSRSave[PBB].intersects(restored)) - spilled |= (CSRSave[PBB] & restored); - } - if (spilled != restored) { - CSRegSet notSpilled = (restored - spilled); - DEBUG(dbgs() << MF->getName() << ": " - << stringifyCSRegSet(notSpilled) - << " restored at " << getBasicBlockName(MBB) - << " are never spilled\n"); - } - } -} - -// Debugging print methods. -std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { - if (!MBB) - return ""; - - if (MBB->getBasicBlock()) - return MBB->getBasicBlock()->getName().str(); - - std::ostringstream name; - name << "_MBB_" << MBB->getNumber(); - return name.str(); -} - -std::string PEI::stringifyCSRegSet(const CSRegSet& s) { - const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); - const std::vector<CalleeSavedInfo> CSI = - MF->getFrameInfo()->getCalleeSavedInfo(); - - std::ostringstream srep; - if (CSI.size() == 0) { - srep << "[]"; - return srep.str(); - } - srep << "["; - CSRegSet::iterator I = s.begin(), E = s.end(); - if (I != E) { - unsigned reg = CSI[*I].getReg(); - srep << TRI->getName(reg); - for (++I; I != E; ++I) { - reg = CSI[*I].getReg(); - srep << ","; - srep << TRI->getName(reg); - } - } - srep << "]"; - return srep.str(); -} - -void PEI::dumpSet(const CSRegSet& s) { - DEBUG(dbgs() << stringifyCSRegSet(s) << "\n"); -} - -void PEI::dumpUsed(MachineBasicBlock* MBB) { - DEBUG({ - if (MBB) - dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; - }); -} - -void PEI::dumpAllUsed() { - for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - dumpUsed(MBB); - } -} - -void PEI::dumpSets(MachineBasicBlock* MBB) { - DEBUG({ - if (MBB) - dbgs() << getBasicBlockName(MBB) << " | " - << stringifyCSRegSet(CSRUsed[MBB]) << " | " - << stringifyCSRegSet(AnticIn[MBB]) << " | " - << stringifyCSRegSet(AnticOut[MBB]) << " | " - << stringifyCSRegSet(AvailIn[MBB]) << " | " - << stringifyCSRegSet(AvailOut[MBB]) << "\n"; - }); -} - -void PEI::dumpSets1(MachineBasicBlock* MBB) { - DEBUG({ - if (MBB) - dbgs() << getBasicBlockName(MBB) << " | " - << stringifyCSRegSet(CSRUsed[MBB]) << " | " - << stringifyCSRegSet(AnticIn[MBB]) << " | " - << stringifyCSRegSet(AnticOut[MBB]) << " | " - << stringifyCSRegSet(AvailIn[MBB]) << " | " - << stringifyCSRegSet(AvailOut[MBB]) << " | " - << stringifyCSRegSet(CSRSave[MBB]) << " | " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; - }); -} - -void PEI::dumpAllSets() { - for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - dumpSets1(MBB); - } -} - -void PEI::dumpSRSets() { - DEBUG({ - for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); - MBB != E; ++MBB) { - if (!CSRSave[MBB].empty()) { - dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]); - if (CSRRestore[MBB].empty()) - dbgs() << '\n'; - } - - if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty()) - dbgs() << " " - << "RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; - } - }); -} -#endif diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp index 3903743..da2e710 100644 --- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -42,48 +42,47 @@ STATISTIC(NumInvokes, "Number of invokes replaced"); STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { - class SjLjEHPrepare : public FunctionPass { - const TargetLoweringBase *TLI; - Type *FunctionContextTy; - Constant *RegisterFn; - Constant *UnregisterFn; - Constant *BuiltinSetjmpFn; - Constant *FrameAddrFn; - Constant *StackAddrFn; - Constant *StackRestoreFn; - Constant *LSDAAddrFn; - Value *PersonalityFn; - Constant *CallSiteFn; - Constant *FuncCtxFn; - AllocaInst *FuncCtx; - public: - static char ID; // Pass identification, replacement for typeid - explicit SjLjEHPrepare(const TargetLoweringBase *tli = NULL) - : FunctionPass(ID), TLI(tli) { } - bool doInitialization(Module &M); - bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const {} - const char *getPassName() const { - return "SJLJ Exception Handling preparation"; - } +class SjLjEHPrepare : public FunctionPass { + const TargetMachine *TM; + Type *FunctionContextTy; + Constant *RegisterFn; + Constant *UnregisterFn; + Constant *BuiltinSetjmpFn; + Constant *FrameAddrFn; + Constant *StackAddrFn; + Constant *StackRestoreFn; + Constant *LSDAAddrFn; + Value *PersonalityFn; + Constant *CallSiteFn; + Constant *FuncCtxFn; + AllocaInst *FuncCtx; + +public: + static char ID; // Pass identification, replacement for typeid + explicit SjLjEHPrepare(const TargetMachine *TM) : FunctionPass(ID), TM(TM) {} + bool doInitialization(Module &M); + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const {} + const char *getPassName() const { + return "SJLJ Exception Handling preparation"; + } - private: - bool setupEntryBlockAndCallSites(Function &F); - void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, - Value *SelVal); - Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads); - void lowerIncomingArguments(Function &F); - void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes); - void insertCallSiteStore(Instruction *I, int Number); - }; +private: + bool setupEntryBlockAndCallSites(Function &F); + void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal); + Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst *> LPads); + void lowerIncomingArguments(Function &F); + void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst *> Invokes); + void insertCallSiteStore(Instruction *I, int Number); +}; } // end anonymous namespace char SjLjEHPrepare::ID = 0; // Public Interface To the SjLjEHPrepare pass. -FunctionPass *llvm::createSjLjEHPreparePass(const TargetLoweringBase *TLI) { - return new SjLjEHPrepare(TLI); +FunctionPass *llvm::createSjLjEHPreparePass(const TargetMachine *TM) { + return new SjLjEHPrepare(TM); } // doInitialization - Set up decalarations and types needed to process // exceptions. @@ -92,23 +91,19 @@ bool SjLjEHPrepare::doInitialization(Module &M) { // builtin_setjmp uses a five word jbuf Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); Type *Int32Ty = Type::getInt32Ty(M.getContext()); - FunctionContextTy = - StructType::get(VoidPtrTy, // __prev - Int32Ty, // call_site - ArrayType::get(Int32Ty, 4), // __data - VoidPtrTy, // __personality - VoidPtrTy, // __lsda - ArrayType::get(VoidPtrTy, 5), // __jbuf - NULL); - RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register", - Type::getVoidTy(M.getContext()), - PointerType::getUnqual(FunctionContextTy), - (Type *)0); - UnregisterFn = - M.getOrInsertFunction("_Unwind_SjLj_Unregister", - Type::getVoidTy(M.getContext()), - PointerType::getUnqual(FunctionContextTy), - (Type *)0); + FunctionContextTy = StructType::get(VoidPtrTy, // __prev + Int32Ty, // call_site + ArrayType::get(Int32Ty, 4), // __data + VoidPtrTy, // __personality + VoidPtrTy, // __lsda + ArrayType::get(VoidPtrTy, 5), // __jbuf + NULL); + RegisterFn = M.getOrInsertFunction( + "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), (Type *)0); + UnregisterFn = M.getOrInsertFunction( + "_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), (Type *)0); FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); @@ -134,16 +129,17 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) { Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site"); // Insert a store of the call-site number - ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()), - Number); - Builder.CreateStore(CallSiteNoC, CallSite, true/*volatile*/); + ConstantInt *CallSiteNoC = + ConstantInt::get(Type::getInt32Ty(I->getContext()), Number); + Builder.CreateStore(CallSiteNoC, CallSite, true /*volatile*/); } /// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until /// we reach blocks we've already seen. static void MarkBlocksLiveIn(BasicBlock *BB, - SmallPtrSet<BasicBlock*, 64> &LiveBBs) { - if (!LiveBBs.insert(BB)) return; // already been here. + SmallPtrSet<BasicBlock *, 64> &LiveBBs) { + if (!LiveBBs.insert(BB)) + return; // already been here. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) MarkBlocksLiveIn(*PI, LiveBBs); @@ -153,12 +149,14 @@ static void MarkBlocksLiveIn(BasicBlock *BB, /// instruction with those returned by the personality function. void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal) { - SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end()); + SmallVector<Value *, 8> UseWorkList(LPI->use_begin(), LPI->use_end()); while (!UseWorkList.empty()) { Value *Val = UseWorkList.pop_back_val(); ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val); - if (!EVI) continue; - if (EVI->getNumIndices() != 1) continue; + if (!EVI) + continue; + if (EVI->getNumIndices() != 1) + continue; if (*EVI->idx_begin() == 0) EVI->replaceAllUsesWith(ExnVal); else if (*EVI->idx_begin() == 1) @@ -167,14 +165,15 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, EVI->eraseFromParent(); } - if (LPI->getNumUses() == 0) return; + if (LPI->getNumUses() == 0) + return; // There are still some uses of LPI. Construct an aggregate with the exception // values and replace the LPI with that aggregate. Type *LPadType = LPI->getType(); Value *LPadVal = UndefValue::get(LPadType); - IRBuilder<> - Builder(llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal)))); + IRBuilder<> Builder( + llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal)))); LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val"); @@ -183,17 +182,18 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, /// setupFunctionContext - Allocate the function context on the stack and fill /// it with all of the data that we know at this point. -Value *SjLjEHPrepare:: -setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { +Value *SjLjEHPrepare::setupFunctionContext(Function &F, + ArrayRef<LandingPadInst *> LPads) { BasicBlock *EntryBB = F.begin(); // Create an alloca for the incoming jump buffer ptr and the new jump buffer // that needs to be restored on all exits from the function. This is an alloca // because the value needs to be added to the global context list. + const TargetLowering *TLI = TM->getTargetLowering(); unsigned Align = - TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); - FuncCtx = - new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin()); + TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); + FuncCtx = new AllocaInst(FunctionContextTy, 0, Align, "fn_context", + EntryBB->begin()); // Fill in the function context structure. for (unsigned I = 0, E = LPads.size(); I != E; ++I) { @@ -204,13 +204,13 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data"); // The exception values come back in context->__data[0]. - Value *ExceptionAddr = Builder.CreateConstGEP2_32(FCData, 0, 0, - "exception_gep"); + Value *ExceptionAddr = + Builder.CreateConstGEP2_32(FCData, 0, 0, "exception_gep"); Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val"); ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy()); - Value *SelectorAddr = Builder.CreateConstGEP2_32(FCData, 0, 1, - "exn_selector_gep"); + Value *SelectorAddr = + Builder.CreateConstGEP2_32(FCData, 0, 1, "exn_selector_gep"); Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); substituteLPadValues(LPI, ExnVal, SelVal); @@ -220,9 +220,11 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { IRBuilder<> Builder(EntryBB->getTerminator()); if (!PersonalityFn) PersonalityFn = LPads[0]->getPersonalityFn(); - Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 3, - "pers_fn_gep"); - Builder.CreateStore(PersonalityFn, PersonalityFieldPtr, /*isVolatile=*/true); + Value *PersonalityFieldPtr = + Builder.CreateConstGEP2_32(FuncCtx, 0, 3, "pers_fn_gep"); + Builder.CreateStore( + Builder.CreateBitCast(PersonalityFn, Builder.getInt8PtrTy()), + PersonalityFieldPtr, /*isVolatile=*/true); // LSDA address Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr"); @@ -242,8 +244,8 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize())) ++AfterAllocaInsPt; - for (Function::arg_iterator - AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) { + for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; + ++AI) { Type *Ty = AI->getType(); // Aggregate types can't be cast, but are legal argument types, so we have @@ -262,9 +264,8 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { // This is always a no-op cast because we're casting AI to AI->getType() // so src and destination types are identical. BitCast is the only // possibility. - CastInst *NC = - new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp", - AfterAllocaInsPt); + CastInst *NC = new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp", + AfterAllocaInsPt); AI->replaceAllUsesWith(NC); // Set the operand of the cast instruction back to the AllocaInst. @@ -281,20 +282,21 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { /// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind /// edge and spill them. void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, - ArrayRef<InvokeInst*> Invokes) { + ArrayRef<InvokeInst *> Invokes) { // Finally, scan the code looking for instructions with bad live ranges. - for (Function::iterator - BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { - for (BasicBlock::iterator - II = BB->begin(), IIE = BB->end(); II != IIE; ++II) { + for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { + for (BasicBlock::iterator II = BB->begin(), IIE = BB->end(); II != IIE; + ++II) { // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. Instruction *Inst = II; - if (Inst->use_empty()) continue; + if (Inst->use_empty()) + continue; if (Inst->hasOneUse() && cast<Instruction>(Inst->use_back())->getParent() == BB && - !isa<PHINode>(Inst->use_back())) continue; + !isa<PHINode>(Inst->use_back())) + continue; // If this is an alloca in the entry block, it's not a real register // value. @@ -303,16 +305,16 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, continue; // Avoid iterator invalidation by copying users to a temporary vector. - SmallVector<Instruction*, 16> Users; - for (Value::use_iterator - UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { + SmallVector<Instruction *, 16> Users; + for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); + UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); if (User->getParent() != BB || isa<PHINode>(User)) Users.push_back(User); } // Find all of the blocks that this value is live in. - SmallPtrSet<BasicBlock*, 64> LiveBBs; + SmallPtrSet<BasicBlock *, 64> LiveBBs; LiveBBs.insert(Inst->getParent()); while (!Users.empty()) { Instruction *U = Users.back(); @@ -336,7 +338,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around " - << UnwindBlock->getName() << "\n"); + << UnwindBlock->getName() << "\n"); NeedsSpill = true; break; } @@ -359,15 +361,16 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, LandingPadInst *LPI = UnwindBlock->getLandingPadInst(); // Place PHIs into a set to avoid invalidating the iterator. - SmallPtrSet<PHINode*, 8> PHIsToDemote; - for (BasicBlock::iterator - PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN) + SmallPtrSet<PHINode *, 8> PHIsToDemote; + for (BasicBlock::iterator PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN) PHIsToDemote.insert(cast<PHINode>(PN)); - if (PHIsToDemote.empty()) continue; + if (PHIsToDemote.empty()) + continue; // Demote the PHIs to the stack. - for (SmallPtrSet<PHINode*, 8>::iterator - I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I) + for (SmallPtrSet<PHINode *, 8>::iterator I = PHIsToDemote.begin(), + E = PHIsToDemote.end(); + I != E; ++I) DemotePHIToStack(*I); // Move the landingpad instruction back to the top of the landing pad block. @@ -379,9 +382,9 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, /// the function context and marking the call sites with the appropriate /// values. These values are used by the DWARF EH emitter. bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { - SmallVector<ReturnInst*, 16> Returns; - SmallVector<InvokeInst*, 16> Invokes; - SmallSetVector<LandingPadInst*, 16> LPads; + SmallVector<ReturnInst *, 16> Returns; + SmallVector<InvokeInst *, 16> Invokes; + SmallSetVector<LandingPadInst *, 16> LPads; // Look through the terminators of the basic blocks to find invokes. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) @@ -401,7 +404,8 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { Returns.push_back(RI); } - if (Invokes.empty()) return false; + if (Invokes.empty()) + return false; NumInvokes += Invokes.size(); @@ -409,7 +413,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { lowerAcrossUnwindEdges(F, Invokes); Value *FuncCtx = - setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); + setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); BasicBlock *EntryBB = F.begin(); IRBuilder<> Builder(EntryBB->getTerminator()); @@ -443,7 +447,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { insertCallSiteStore(Invokes[I], I + 1); ConstantInt *CallSiteNum = - ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); + ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); // Record the call site value for the back end so it stays associated with // the invoke. @@ -465,8 +469,8 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { } // Register the function context and make sure it's known to not throw - CallInst *Register = CallInst::Create(RegisterFn, FuncCtx, "", - EntryBB->getTerminator()); + CallInst *Register = + CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator()); Register->setDoesNotThrow(); // Following any allocas not in the entry block, update the saved SP in the diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp index c5bbba3..10a93b7 100644 --- a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp @@ -31,8 +31,8 @@ #include "SpillPlacement.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/EdgeBundles.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" @@ -53,11 +53,16 @@ char &llvm::SpillPlacementID = SpillPlacement::ID; void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addRequired<MachineBlockFrequencyInfo>(); AU.addRequiredTransitive<EdgeBundles>(); AU.addRequiredTransitive<MachineLoopInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } +/// Decision threshold. A node gets the output value 0 if the weighted sum of +/// its inputs falls in the open interval (-Threshold;Threshold). +static const BlockFrequency Threshold = 2; + /// Node - Each edge bundle corresponds to a Hopfield node. /// /// The node contains precomputed frequency data that only depends on the CFG, @@ -68,31 +73,25 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { /// because all weights are positive. /// struct SpillPlacement::Node { - /// Scale - Inverse block frequency feeding into[0] or out of[1] the bundle. - /// Ideally, these two numbers should be identical, but inaccuracies in the - /// block frequency estimates means that we need to normalize ingoing and - /// outgoing frequencies separately so they are commensurate. - float Scale[2]; - - /// Bias - Normalized contributions from non-transparent blocks. - /// A bundle connected to a MustSpill block has a huge negative bias, - /// otherwise it is a number in the range [-2;2]. - float Bias; + /// BiasN - Sum of blocks that prefer a spill. + BlockFrequency BiasN; + /// BiasP - Sum of blocks that prefer a register. + BlockFrequency BiasP; /// Value - Output value of this node computed from the Bias and links. - /// This is always in the range [-1;1]. A positive number means the variable - /// should go in a register through this bundle. - float Value; + /// This is always on of the values {-1, 0, 1}. A positive number means the + /// variable should go in a register through this bundle. + int Value; - typedef SmallVector<std::pair<float, unsigned>, 4> LinkVector; + typedef SmallVector<std::pair<BlockFrequency, unsigned>, 4> LinkVector; /// Links - (Weight, BundleNo) for all transparent blocks connecting to other - /// bundles. The weights are all positive and add up to at most 2, weights - /// from ingoing and outgoing nodes separately add up to a most 1. The weight - /// sum can be less than 2 when the variable is not live into / out of some - /// connected basic blocks. + /// bundles. The weights are all positive block frequencies. LinkVector Links; + /// SumLinkWeights - Cached sum of the weights of all links + ThresHold. + BlockFrequency SumLinkWeights; + /// preferReg - Return true when this node prefers to be in a register. bool preferReg() const { // Undecided nodes (Value==0) go on the stack. @@ -101,28 +100,24 @@ struct SpillPlacement::Node { /// mustSpill - Return True if this node is so biased that it must spill. bool mustSpill() const { - // Actually, we must spill if Bias < sum(weights). - // It may be worth it to compute the weight sum here? - return Bias < -2.0f; - } - - /// Node - Create a blank Node. - Node() { - Scale[0] = Scale[1] = 0; + // We must spill if Bias < -sum(weights) or the MustSpill flag was set. + // BiasN is saturated when MustSpill is set, make sure this still returns + // true when the RHS saturates. Note that SumLinkWeights includes Threshold. + return BiasN >= BiasP + SumLinkWeights; } /// clear - Reset per-query data, but preserve frequencies that only depend on // the CFG. void clear() { - Bias = Value = 0; + BiasN = BiasP = Value = 0; + SumLinkWeights = Threshold; Links.clear(); } /// addLink - Add a link to bundle b with weight w. - /// out=0 for an ingoing link, and 1 for an outgoing link. - void addLink(unsigned b, float w, bool out) { - // Normalize w relative to all connected blocks from that direction. - w *= Scale[out]; + void addLink(unsigned b, BlockFrequency w) { + // Update cached sum. + SumLinkWeights += w; // There can be multiple links to the same bundle, add them up. for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) @@ -134,33 +129,48 @@ struct SpillPlacement::Node { Links.push_back(std::make_pair(w, b)); } - /// addBias - Bias this node from an ingoing[0] or outgoing[1] link. - /// Return the change to the total number of positive biases. - void addBias(float w, bool out) { - // Normalize w relative to all connected blocks from that direction. - w *= Scale[out]; - Bias += w; + /// addBias - Bias this node. + void addBias(BlockFrequency freq, BorderConstraint direction) { + switch (direction) { + default: + break; + case PrefReg: + BiasP += freq; + break; + case PrefSpill: + BiasN += freq; + break; + case MustSpill: + BiasN = BlockFrequency::getMaxFrequency(); + break; + } } /// update - Recompute Value from Bias and Links. Return true when node /// preference changes. bool update(const Node nodes[]) { // Compute the weighted sum of inputs. - float Sum = Bias; - for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) - Sum += I->first * nodes[I->second].Value; + BlockFrequency SumN = BiasN; + BlockFrequency SumP = BiasP; + for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) { + if (nodes[I->second].Value == -1) + SumN += I->first; + else if (nodes[I->second].Value == 1) + SumP += I->first; + } - // The weighted sum is going to be in the range [-2;2]. Ideally, we should - // simply set Value = sign(Sum), but we will add a dead zone around 0 for - // two reasons: + // Each weighted sum is going to be less than the total frequency of the + // bundle. Ideally, we should simply set Value = sign(SumP - SumN), but we + // will add a dead zone around 0 for two reasons: + // // 1. It avoids arbitrary bias when all links are 0 as is possible during // initial iterations. // 2. It helps tame rounding errors when the links nominally sum to 0. - const float Thres = 1e-4f; + // bool Before = preferReg(); - if (Sum < -Thres) + if (SumN >= SumP + Threshold) Value = -1; - else if (Sum > Thres) + else if (SumP >= SumN + Threshold) Value = 1; else Value = 0; @@ -177,22 +187,13 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { nodes = new Node[bundles->getNumBundles()]; // Compute total ingoing and outgoing block frequencies for all bundles. - BlockFrequency.resize(mf.getNumBlockIDs()); + BlockFrequencies.resize(mf.getNumBlockIDs()); + MachineBlockFrequencyInfo &MBFI = getAnalysis<MachineBlockFrequencyInfo>(); for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) { - float Freq = LiveIntervals::getSpillWeight(true, false, - loops->getLoopDepth(I)); unsigned Num = I->getNumber(); - BlockFrequency[Num] = Freq; - nodes[bundles->getBundle(Num, 1)].Scale[0] += Freq; - nodes[bundles->getBundle(Num, 0)].Scale[1] += Freq; + BlockFrequencies[Num] = MBFI.getBlockFreq(I); } - // Scales are reciprocal frequencies. - for (unsigned i = 0, e = bundles->getNumBundles(); i != e; ++i) - for (unsigned d = 0; d != 2; ++d) - if (nodes[i].Scale[d] > 0) - nodes[i].Scale[d] = 1 / nodes[i].Scale[d]; - // We never change the function. return false; } @@ -213,12 +214,15 @@ void SpillPlacement::activate(unsigned n) { // landing pads, or loops with many 'continue' statements. It is difficult to // allocate registers when so many different blocks are involved. // - // Give a small negative bias to large bundles such that 1/32 of the - // connected blocks need to be interested before we consider expanding the - // region through the bundle. This helps compile time by limiting the number - // of blocks visited and the number of links in the Hopfield network. - if (bundles->getBlocks(n).size() > 100) - nodes[n].Bias = -0.0625f; + // Give a small negative bias to large bundles such that a substantial + // fraction of the connected blocks need to be interested before we consider + // expanding the region through the bundle. This helps compile time by + // limiting the number of blocks visited and the number of links in the + // Hopfield network. + if (bundles->getBlocks(n).size() > 100) { + nodes[n].BiasP = 0; + nodes[n].BiasN = (BlockFrequency::getEntryFrequency() / 16); + } } @@ -227,27 +231,20 @@ void SpillPlacement::activate(unsigned n) { void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) { for (ArrayRef<BlockConstraint>::iterator I = LiveBlocks.begin(), E = LiveBlocks.end(); I != E; ++I) { - float Freq = getBlockFrequency(I->Number); - const float Bias[] = { - 0, // DontCare, - 1, // PrefReg, - -1, // PrefSpill - 0, // PrefBoth - -HUGE_VALF // MustSpill - }; + BlockFrequency Freq = BlockFrequencies[I->Number]; // Live-in to block? if (I->Entry != DontCare) { unsigned ib = bundles->getBundle(I->Number, 0); activate(ib); - nodes[ib].addBias(Freq * Bias[I->Entry], 1); + nodes[ib].addBias(Freq, I->Entry); } // Live-out from block? if (I->Exit != DontCare) { unsigned ob = bundles->getBundle(I->Number, 1); activate(ob); - nodes[ob].addBias(Freq * Bias[I->Exit], 0); + nodes[ob].addBias(Freq, I->Exit); } } } @@ -256,15 +253,15 @@ void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) { void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) { for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); I != E; ++I) { - float Freq = getBlockFrequency(*I); + BlockFrequency Freq = BlockFrequencies[*I]; if (Strong) Freq += Freq; unsigned ib = bundles->getBundle(*I, 0); unsigned ob = bundles->getBundle(*I, 1); activate(ib); activate(ob); - nodes[ib].addBias(-Freq, 1); - nodes[ob].addBias(-Freq, 0); + nodes[ib].addBias(Freq, PrefSpill); + nodes[ob].addBias(Freq, PrefSpill); } } @@ -284,9 +281,9 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) { Linked.push_back(ib); if (nodes[ob].Links.empty() && !nodes[ob].mustSpill()) Linked.push_back(ob); - float Freq = getBlockFrequency(Number); - nodes[ib].addLink(ob, Freq, 1); - nodes[ob].addLink(ib, Freq, 0); + BlockFrequency Freq = BlockFrequencies[Number]; + nodes[ib].addLink(ob, Freq); + nodes[ob].addLink(ib, Freq); } } diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h index fc412f8..105516b 100644 --- a/contrib/llvm/lib/CodeGen/SpillPlacement.h +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h @@ -30,6 +30,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/BlockFrequency.h" namespace llvm { @@ -57,7 +58,7 @@ class SpillPlacement : public MachineFunctionPass { SmallVector<unsigned, 8> RecentPositive; // Block frequencies are computed once. Indexed by block number. - SmallVector<float, 4> BlockFrequency; + SmallVector<BlockFrequency, 4> BlockFrequencies; public: static char ID; // Pass identification, replacement for typeid. @@ -139,8 +140,8 @@ public: /// getBlockFrequency - Return the estimated block execution frequency per /// function invocation. - float getBlockFrequency(unsigned Number) const { - return BlockFrequency[Number]; + BlockFrequency getBlockFrequency(unsigned Number) const { + return BlockFrequencies[Number]; } private: diff --git a/contrib/llvm/lib/CodeGen/Spiller.cpp b/contrib/llvm/lib/CodeGen/Spiller.cpp index 209792f..d5b3a4a 100644 --- a/contrib/llvm/lib/CodeGen/Spiller.cpp +++ b/contrib/llvm/lib/CodeGen/Spiller.cpp @@ -77,7 +77,7 @@ protected: DEBUG(dbgs() << "Spilling everywhere " << *li << "\n"); - assert(li->weight != HUGE_VALF && + assert(li->weight != llvm::huge_valf && "Attempting to spill already spilled value."); assert(!TargetRegisterInfo::isStackSlot(li->reg) && @@ -115,15 +115,14 @@ protected: indices.push_back(i); } - // Create a new vreg & interval for this instr. - LiveInterval *newLI = &LRE.create(); - newLI->weight = HUGE_VALF; + // Create a new virtual register for the load and/or store. + unsigned NewVReg = LRE.create(); // Update the reg operands & kill flags. for (unsigned i = 0; i < indices.size(); ++i) { unsigned mopIdx = indices[i]; MachineOperand &mop = mi->getOperand(mopIdx); - mop.setReg(newLI->reg); + mop.setReg(NewVReg); if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) { mop.setIsKill(true); } @@ -133,28 +132,20 @@ protected: // Insert reload if necessary. MachineBasicBlock::iterator miItr(mi); if (hasUse) { - tii->loadRegFromStackSlot(*mi->getParent(), miItr, newLI->reg, ss, trc, + MachineInstrSpan MIS(miItr); + + tii->loadRegFromStackSlot(*mi->getParent(), miItr, NewVReg, ss, trc, tri); - MachineInstr *loadInstr(prior(miItr)); - SlotIndex loadIndex = - lis->InsertMachineInstrInMaps(loadInstr).getRegSlot(); - SlotIndex endIndex = loadIndex.getNextIndex(); - VNInfo *loadVNI = - newLI->getNextValue(loadIndex, lis->getVNInfoAllocator()); - newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI)); + lis->InsertMachineInstrRangeInMaps(MIS.begin(), miItr); } // Insert store if necessary. if (hasDef) { - tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr),newLI->reg, + MachineInstrSpan MIS(miItr); + + tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), NewVReg, true, ss, trc, tri); - MachineInstr *storeInstr(llvm::next(miItr)); - SlotIndex storeIndex = - lis->InsertMachineInstrInMaps(storeInstr).getRegSlot(); - SlotIndex beginIndex = storeIndex.getPrevIndex(); - VNInfo *storeVNI = - newLI->getNextValue(beginIndex, lis->getVNInfoAllocator()); - newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI)); + lis->InsertMachineInstrRangeInMaps(llvm::next(miItr), MIS.end()); } } } diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp index 0a3818e..68a15f7 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp @@ -214,7 +214,7 @@ bool SplitAnalysis::calcLiveBlockInfo() { // When not live in, the first use should be a def. if (!BI.LiveIn) { - assert(LVI->start == LVI->valno->def && "Dangling LiveRange start"); + assert(LVI->start == LVI->valno->def && "Dangling Segment start"); assert(LVI->start == BI.FirstInstr && "First instr should be a def"); BI.FirstDef = BI.FirstInstr; } @@ -245,8 +245,8 @@ bool SplitAnalysis::calcLiveBlockInfo() { BI.FirstInstr = BI.FirstDef = LVI->start; } - // A LiveRange that starts in the middle of the block must be a def. - assert(LVI->start == LVI->valno->def && "Dangling LiveRange start"); + // A Segment that starts in the middle of the block must be a def. + assert(LVI->start == LVI->valno->def && "Dangling Segment start"); if (!BI.FirstDef) BI.FirstDef = LVI->start; } @@ -325,12 +325,14 @@ void SplitAnalysis::analyze(const LiveInterval *li) { SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm, - MachineDominatorTree &mdt) + MachineDominatorTree &mdt, + MachineBlockFrequencyInfo &mbfi) : SA(sa), LIS(lis), VRM(vrm), MRI(vrm.getMachineFunction().getRegInfo()), MDT(mdt), TII(*vrm.getMachineFunction().getTarget().getInstrInfo()), TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()), + MBFI(mbfi), Edit(0), OpenIdx(0), SpillMode(SM_Partition), @@ -375,7 +377,7 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, assert(ParentVNI && "Mapping NULL value"); assert(Idx.isValid() && "Invalid SlotIndex"); assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI"); - LiveInterval *LI = Edit->get(RegIdx); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); // Create a new value. VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator()); @@ -393,14 +395,14 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, // If the previous value was a simple mapping, add liveness for it now. if (VNInfo *OldVNI = InsP.first->second.getPointer()) { SlotIndex Def = OldVNI->def; - LI->addRange(LiveRange(Def, Def.getDeadSlot(), OldVNI)); + LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), OldVNI)); // No longer a simple mapping. Switch to a complex, non-forced mapping. InsP.first->second = ValueForcePair(); } // This is a complex mapping, add liveness for VNI SlotIndex Def = VNI->def; - LI->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); + LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI)); return VNI; } @@ -420,7 +422,8 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) { // This was previously a single mapping. Make sure the old def is represented // by a trivial live range. SlotIndex Def = VNI->def; - Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); + LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI)); // Mark as complex mapped, forced. VFP = ValueForcePair(0, true); } @@ -432,7 +435,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, MachineBasicBlock::iterator I) { MachineInstr *CopyMI = 0; SlotIndex Def; - LiveInterval *LI = Edit->get(RegIdx); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); // We may be trying to avoid interference that ends at a deleted instruction, // so always begin RegIdx 0 early and all others late. @@ -460,11 +463,11 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, unsigned SplitEditor::openIntv() { // Create the complement as index 0. if (Edit->empty()) - Edit->create(); + Edit->createEmptyInterval(); // Create the open interval. OpenIdx = Edit->size(); - Edit->create(); + Edit->createEmptyInterval(); return OpenIdx; } @@ -629,7 +632,7 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { //===----------------------------------------------------------------------===// void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { - LiveInterval *LI = Edit->get(0); + LiveInterval *LI = &LIS.getInterval(Edit->get(0)); DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n"); RegAssignMap::iterator AssignI; AssignI.setMap(RegAssign); @@ -728,7 +731,7 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB, void SplitEditor::hoistCopiesForSize() { // Get the complement interval, always RegIdx 0. - LiveInterval *LI = Edit->get(0); + LiveInterval *LI = &LIS.getInterval(Edit->get(0)); LiveInterval *Parent = &Edit->getParent(); // Track the nearest common dominator for all back-copies for each ParentVNI, @@ -859,13 +862,13 @@ bool SplitEditor::transferValues() { // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI. DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx); - LiveInterval *LI = Edit->get(RegIdx); + LiveRange &LR = LIS.getInterval(Edit->get(RegIdx)); // Check for a simply defined value that can be blitted directly. ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id)); if (VNInfo *VNI = VFP.getPointer()) { DEBUG(dbgs() << ':' << VNI->id); - LI->addRange(LiveRange(Start, End, VNI)); + LR.addSegment(LiveInterval::Segment(Start, End, VNI)); Start = End; continue; } @@ -889,7 +892,7 @@ bool SplitEditor::transferValues() { // The first block may be live-in, or it may have its own def. if (Start != BlockStart) { - VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End)); + VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped value"); DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber()); // MBB has its own def. Is it also live-out? @@ -909,7 +912,7 @@ bool SplitEditor::transferValues() { if (BlockStart == ParentVNI->def) { // This block has the def of a parent PHI, so it isn't live-in. assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?"); - VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End)); + VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped parent PHI"); if (End >= BlockEnd) LRC.setLiveOutValue(MBB, VNI); // Live-out as well. @@ -917,10 +920,10 @@ bool SplitEditor::transferValues() { // This block needs a live-in value. The last block covered may not // be live-out. if (End < BlockEnd) - LRC.addLiveInBlock(LI, MDT[MBB], End); + LRC.addLiveInBlock(LR, MDT[MBB], End); else { // Live-through, and we don't know the value. - LRC.addLiveInBlock(LI, MDT[MBB]); + LRC.addLiveInBlock(LR, MDT[MBB]); LRC.setLiveOutValue(MBB, 0); } } @@ -947,7 +950,7 @@ void SplitEditor::extendPHIKillRanges() { if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) continue; unsigned RegIdx = RegAssign.lookup(PHIVNI->def); - LiveInterval *LI = Edit->get(RegIdx); + LiveRange &LR = LIS.getInterval(Edit->get(RegIdx)); LiveRangeCalc &LRC = getLRCalc(RegIdx); MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), @@ -959,7 +962,7 @@ void SplitEditor::extendPHIKillRanges() { if (Edit->getParent().liveAt(LastUse)) { assert(RegAssign.lookup(LastUse) == RegIdx && "Different register assignment in phi predecessor"); - LRC.extend(LI, End); + LRC.extend(LR, End); } } } @@ -988,7 +991,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { // Rewrite to the mapped register at Idx. unsigned RegIdx = RegAssign.lookup(Idx); - LiveInterval *LI = Edit->get(RegIdx); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); MO.setReg(LI->reg); DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' << Idx << ':' << RegIdx << '\t' << *MI); @@ -1009,14 +1012,14 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { } else Idx = Idx.getRegSlot(true); - getLRCalc(RegIdx).extend(LI, Idx.getNextSlot()); + getLRCalc(RegIdx).extend(*LI, Idx.getNextSlot()); } } void SplitEditor::deleteRematVictims() { SmallVector<MachineInstr*, 8> Dead; for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){ - LiveInterval *LI = *I; + LiveInterval *LI = &LIS.getInterval(*I); for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end(); LII != LIE; ++LII) { // Dead defs end at the dead slot. @@ -1089,8 +1092,10 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { deleteRematVictims(); // Get rid of unused values and set phi-kill flags. - for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) - (*I)->RenumberValues(LIS); + for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) { + LiveInterval &LI = LIS.getInterval(*I); + LI.RenumberValues(); + } // Provide a reverse mapping from original indices to Edit ranges. if (LRMap) { @@ -1103,7 +1108,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { ConnectedVNInfoEqClasses ConEQ(LIS); for (unsigned i = 0, e = Edit->size(); i != e; ++i) { // Don't use iterators, they are invalidated by create() below. - LiveInterval *li = Edit->get(i); + LiveInterval *li = &LIS.getInterval(Edit->get(i)); unsigned NumComp = ConEQ.Classify(li); if (NumComp <= 1) continue; @@ -1111,7 +1116,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { SmallVector<LiveInterval*, 8> dups; dups.push_back(li); for (unsigned j = 1; j != NumComp; ++j) - dups.push_back(&Edit->create()); + dups.push_back(&Edit->createEmptyInterval()); ConEQ.Distribute(&dups[0], MRI); // The new intervals all map back to i. if (LRMap) @@ -1119,7 +1124,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { } // Calculate spill weight and allocation hints for new intervals. - Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops); + Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops, MBFI); assert(!LRMap || LRMap->size() == Edit->size()); } diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h index 4005a3d..f029c73 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.h +++ b/contrib/llvm/lib/CodeGen/SplitKit.h @@ -27,6 +27,7 @@ class ConnectedVNInfoEqClasses; class LiveInterval; class LiveIntervals; class LiveRangeEdit; +class MachineBlockFrequencyInfo; class MachineInstr; class MachineLoopInfo; class MachineRegisterInfo; @@ -215,6 +216,7 @@ class SplitEditor { MachineDominatorTree &MDT; const TargetInstrInfo &TII; const TargetRegisterInfo &TRI; + const MachineBlockFrequencyInfo &MBFI; public: @@ -349,7 +351,7 @@ public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. /// Newly created intervals will be appended to newIntervals. SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&, - MachineDominatorTree&); + MachineDominatorTree&, MachineBlockFrequencyInfo &); /// reset - Prepare for a new split. void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition); diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp index a789a25..3dbc050 100644 --- a/contrib/llvm/lib/CodeGen/StackColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp @@ -42,6 +42,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/DebugInfo.h" #include "llvm/IR/Function.h" @@ -169,7 +170,7 @@ private: /// slots to use the joint slots. void remapInstructions(DenseMap<int, int> &SlotRemap); - /// The input program may contain intructions which are not inside lifetime + /// The input program may contain instructions which are not inside lifetime /// markers. This can happen due to a bug in the compiler or due to a bug in /// user code (for example, returning a reference to a local variable). /// This procedure checks all of the instructions in the function and @@ -309,9 +310,9 @@ void StackColoring::calculateLocalLiveness() { SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet; - for (SmallVector<const MachineBasicBlock*, 8>::iterator - PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); - PI != PE; ++PI) { + for (SmallVectorImpl<const MachineBasicBlock *>::iterator + PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); + PI != PE; ++PI) { const MachineBasicBlock *BB = *PI; if (!BBSet.count(BB)) continue; @@ -428,17 +429,14 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { } // Create the interval of the blocks that we previously found to be 'alive'. - BitVector Alive = BlockLiveness[MBB].LiveIn; - Alive |= BlockLiveness[MBB].LiveOut; - - if (Alive.any()) { - for (int pos = Alive.find_first(); pos != -1; - pos = Alive.find_next(pos)) { - if (!Starts[pos].isValid()) - Starts[pos] = Indexes->getMBBStartIdx(MBB); - if (!Finishes[pos].isValid()) - Finishes[pos] = Indexes->getMBBEndIdx(MBB); - } + BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB]; + for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1; + pos = MBBLiveness.LiveIn.find_next(pos)) { + Starts[pos] = Indexes->getMBBStartIdx(MBB); + } + for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1; + pos = MBBLiveness.LiveOut.find_next(pos)) { + Finishes[pos] = Indexes->getMBBEndIdx(MBB); } for (unsigned i = 0; i < NumSlots; ++i) { @@ -452,14 +450,14 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { SlotIndex F = Finishes[i]; if (S < F) { // We have a single consecutive region. - Intervals[i]->addRange(LiveRange(S, F, ValNum)); + Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum)); } else { // We have two non consecutive regions. This happens when // LIFETIME_START appears after the LIFETIME_END marker. SlotIndex NewStart = Indexes->getMBBStartIdx(MBB); SlotIndex NewFin = Indexes->getMBBEndIdx(MBB); - Intervals[i]->addRange(LiveRange(NewStart, F, ValNum)); - Intervals[i]->addRange(LiveRange(S, NewFin, ValNum)); + Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum)); + Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum)); } } } @@ -528,6 +526,10 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { if (!V) continue; + const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V); + if (PSV && PSV->isConstant(MFI)) + continue; + // Climb up and find the original alloca. V = GetUnderlyingObject(V); // If we did not find one, or if the one that we found is not in our @@ -761,7 +763,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Merge disjoint slots. if (!First->overlaps(*Second)) { Changed = true; - First->MergeRangesInAsValue(*Second, First->getValNumInfo(0)); + First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0)); SlotRemap[SecondSlot] = FirstSlot; SortedSlots[J] = -1; DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<< diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp new file mode 100644 index 0000000..40893ea --- /dev/null +++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp @@ -0,0 +1,314 @@ +//===---------------------------- StackMaps.cpp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stackmaps" + +#include "llvm/CodeGen/StackMaps.h" + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include <iterator> + +using namespace llvm; + +PatchPointOpers::PatchPointOpers(const MachineInstr *MI): + MI(MI), + HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && + !MI->getOperand(0).isImplicit()), + IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == CallingConv::AnyReg) { + +#ifndef NDEBUG + { + unsigned CheckStartIdx = 0, e = MI->getNumOperands(); + while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() && + MI->getOperand(CheckStartIdx).isDef() && + !MI->getOperand(CheckStartIdx).isImplicit()) + ++CheckStartIdx; + + assert(getMetaIdx() == CheckStartIdx && + "Unexpected additonal definition in Patchpoint intrinsic."); + } +#endif +} + +unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const { + if (!StartIdx) + StartIdx = getVarIdx(); + + // Find the next scratch register (implicit def and early clobber) + unsigned ScratchIdx = StartIdx, e = MI->getNumOperands(); + while (ScratchIdx < e && + !(MI->getOperand(ScratchIdx).isReg() && + MI->getOperand(ScratchIdx).isDef() && + MI->getOperand(ScratchIdx).isImplicit() && + MI->getOperand(ScratchIdx).isEarlyClobber())) + ++ScratchIdx; + + assert(ScratchIdx != e && "No scratch register available"); + return ScratchIdx; +} + +void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint32_t ID, + MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE, + bool recordResult) { + + MCContext &OutContext = AP.OutStreamer.getContext(); + MCSymbol *MILabel = OutContext.CreateTempSymbol(); + AP.OutStreamer.EmitLabel(MILabel); + + LocationVec CallsiteLocs; + + if (recordResult) { + std::pair<Location, MachineInstr::const_mop_iterator> ParseResult = + OpParser(MI.operands_begin(), llvm::next(MI.operands_begin()), AP.TM); + + Location &Loc = ParseResult.first; + assert(Loc.LocType == Location::Register && + "Stackmap return location must be a register."); + CallsiteLocs.push_back(Loc); + } + + while (MOI != MOE) { + std::pair<Location, MachineInstr::const_mop_iterator> ParseResult = + OpParser(MOI, MOE, AP.TM); + + Location &Loc = ParseResult.first; + + // Move large constants into the constant pool. + if (Loc.LocType == Location::Constant && (Loc.Offset & ~0xFFFFFFFFULL)) { + Loc.LocType = Location::ConstantIndex; + Loc.Offset = ConstPool.getConstantIndex(Loc.Offset); + } + + CallsiteLocs.push_back(Loc); + MOI = ParseResult.second; + } + + const MCExpr *CSOffsetExpr = MCBinaryExpr::CreateSub( + MCSymbolRefExpr::Create(MILabel, OutContext), + MCSymbolRefExpr::Create(AP.CurrentFnSym, OutContext), + OutContext); + + CSInfos.push_back(CallsiteInfo(CSOffsetExpr, ID, CallsiteLocs)); +} + +static MachineInstr::const_mop_iterator +getStackMapEndMOP(MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE) { + for (; MOI != MOE; ++MOI) + if (MOI->isRegMask() || (MOI->isReg() && MOI->isImplicit())) + break; + + return MOI; +} + +void StackMaps::recordStackMap(const MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::STACKMAP && "exected stackmap"); + + int64_t ID = MI.getOperand(0).getImm(); + assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs"); + recordStackMapOpers(MI, ID, llvm::next(MI.operands_begin(), 2), + getStackMapEndMOP(MI.operands_begin(), + MI.operands_end())); +} + +void StackMaps::recordPatchPoint(const MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "exected stackmap"); + + PatchPointOpers opers(&MI); + int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm(); + assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs"); + MachineInstr::const_mop_iterator MOI = + llvm::next(MI.operands_begin(), opers.getStackMapStartIdx()); + recordStackMapOpers(MI, ID, MOI, getStackMapEndMOP(MOI, MI.operands_end()), + opers.isAnyReg() && opers.hasDef()); + +#ifndef NDEBUG + // verify anyregcc + LocationVec &Locations = CSInfos.back().Locations; + if (opers.isAnyReg()) { + unsigned NArgs = opers.getMetaOper(PatchPointOpers::NArgPos).getImm(); + for (unsigned i = 0, e = (opers.hasDef() ? NArgs+1 : NArgs); i != e; ++i) + assert(Locations[i].LocType == Location::Register && + "anyreg arg must be in reg."); + } +#endif +} + +/// serializeToStackMapSection conceptually populates the following fields: +/// +/// uint32 : Reserved (header) +/// uint32 : NumConstants +/// int64 : Constants[NumConstants] +/// uint32 : NumRecords +/// StkMapRecord[NumRecords] { +/// uint32 : PatchPoint ID +/// uint32 : Instruction Offset +/// uint16 : Reserved (record flags) +/// uint16 : NumLocations +/// Location[NumLocations] { +/// uint8 : Register | Direct | Indirect | Constant | ConstantIndex +/// uint8 : Size in Bytes +/// uint16 : Dwarf RegNum +/// int32 : Offset +/// } +/// } +/// +/// Location Encoding, Type, Value: +/// 0x1, Register, Reg (value in register) +/// 0x2, Direct, Reg + Offset (frame index) +/// 0x3, Indirect, [Reg + Offset] (spilled value) +/// 0x4, Constant, Offset (small constant) +/// 0x5, ConstIndex, Constants[Offset] (large constant) +/// +void StackMaps::serializeToStackMapSection() { + // Bail out if there's no stack map data. + if (CSInfos.empty()) + return; + + MCContext &OutContext = AP.OutStreamer.getContext(); + const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo(); + + // Create the section. + const MCSection *StackMapSection = + OutContext.getObjectFileInfo()->getStackMapSection(); + AP.OutStreamer.SwitchSection(StackMapSection); + + // Emit a dummy symbol to force section inclusion. + AP.OutStreamer.EmitLabel( + OutContext.GetOrCreateSymbol(Twine("__LLVM_StackMaps"))); + + // Serialize data. + const char *WSMP = "Stack Maps: "; + (void)WSMP; + const MCRegisterInfo &MCRI = *OutContext.getRegisterInfo(); + + DEBUG(dbgs() << "********** Stack Map Output **********\n"); + + // Header. + AP.OutStreamer.EmitIntValue(0, 4); + + // Num constants. + AP.OutStreamer.EmitIntValue(ConstPool.getNumConstants(), 4); + + // Constant pool entries. + for (unsigned i = 0; i < ConstPool.getNumConstants(); ++i) + AP.OutStreamer.EmitIntValue(ConstPool.getConstant(i), 8); + + DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << "\n"); + AP.OutStreamer.EmitIntValue(CSInfos.size(), 4); + + for (CallsiteInfoList::const_iterator CSII = CSInfos.begin(), + CSIE = CSInfos.end(); + CSII != CSIE; ++CSII) { + + unsigned CallsiteID = CSII->ID; + const LocationVec &CSLocs = CSII->Locations; + + DEBUG(dbgs() << WSMP << "callsite " << CallsiteID << "\n"); + + // Verify stack map entry. It's better to communicate a problem to the + // runtime than crash in case of in-process compilation. Currently, we do + // simple overflow checks, but we may eventually communicate other + // compilation errors this way. + if (CSLocs.size() > UINT16_MAX) { + AP.OutStreamer.EmitIntValue(UINT32_MAX, 4); // Invalid ID. + AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4); + AP.OutStreamer.EmitIntValue(0, 2); // Reserved. + AP.OutStreamer.EmitIntValue(0, 2); // 0 locations. + continue; + } + + AP.OutStreamer.EmitIntValue(CallsiteID, 4); + AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4); + + // Reserved for flags. + AP.OutStreamer.EmitIntValue(0, 2); + + DEBUG(dbgs() << WSMP << " has " << CSLocs.size() << " locations\n"); + + AP.OutStreamer.EmitIntValue(CSLocs.size(), 2); + + unsigned operIdx = 0; + for (LocationVec::const_iterator LocI = CSLocs.begin(), LocE = CSLocs.end(); + LocI != LocE; ++LocI, ++operIdx) { + const Location &Loc = *LocI; + DEBUG( + dbgs() << WSMP << " Loc " << operIdx << ": "; + switch (Loc.LocType) { + case Location::Unprocessed: + dbgs() << "<Unprocessed operand>"; + break; + case Location::Register: + dbgs() << "Register " << MCRI.getName(Loc.Reg); + break; + case Location::Direct: + dbgs() << "Direct " << MCRI.getName(Loc.Reg); + if (Loc.Offset) + dbgs() << " + " << Loc.Offset; + break; + case Location::Indirect: + dbgs() << "Indirect " << MCRI.getName(Loc.Reg) + << " + " << Loc.Offset; + break; + case Location::Constant: + dbgs() << "Constant " << Loc.Offset; + break; + case Location::ConstantIndex: + dbgs() << "Constant Index " << Loc.Offset; + break; + } + dbgs() << "\n"; + ); + + unsigned RegNo = 0; + int Offset = Loc.Offset; + if(Loc.Reg) { + RegNo = MCRI.getDwarfRegNum(Loc.Reg, false); + for (MCSuperRegIterator SR(Loc.Reg, TRI); + SR.isValid() && (int)RegNo < 0; ++SR) { + RegNo = TRI->getDwarfRegNum(*SR, false); + } + // If this is a register location, put the subregister byte offset in + // the location offset. + if (Loc.LocType == Location::Register) { + assert(!Loc.Offset && "Register location should have zero offset"); + unsigned LLVMRegNo = MCRI.getLLVMRegNum(RegNo, false); + unsigned SubRegIdx = MCRI.getSubRegIndex(LLVMRegNo, Loc.Reg); + if (SubRegIdx) + Offset = MCRI.getSubRegIdxOffset(SubRegIdx); + } + } + else { + assert(Loc.LocType != Location::Register && + "Missing location register"); + } + AP.OutStreamer.EmitIntValue(Loc.LocType, 1); + AP.OutStreamer.EmitIntValue(Loc.Size, 1); + AP.OutStreamer.EmitIntValue(RegNo, 2); + AP.OutStreamer.EmitIntValue(Offset, 4); + } + } + + AP.OutStreamer.AddBlankLine(); + + CSInfos.clear(); +} diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp index fbef347..9020449 100644 --- a/contrib/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -15,147 +15,120 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "stack-protector" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/Triple.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" -#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetLowering.h" +#include <cstdlib> using namespace llvm; STATISTIC(NumFunProtected, "Number of functions protected"); STATISTIC(NumAddrTaken, "Number of local variables that have their address" " taken."); -namespace { - class StackProtector : public FunctionPass { - /// TLI - Keep a pointer of a TargetLowering to consult for determining - /// target type sizes. - const TargetLoweringBase *TLI; - - Function *F; - Module *M; - - DominatorTree *DT; - - /// VisitedPHIs - The set of PHI nodes visited when determining - /// if a variable's reference has been taken. This set - /// is maintained to ensure we don't visit the same PHI node multiple - /// times. - SmallPtrSet<const PHINode*, 16> VisitedPHIs; - - /// InsertStackProtectors - Insert code into the prologue and epilogue of - /// the function. - /// - /// - The prologue code loads and stores the stack guard onto the stack. - /// - The epilogue checks the value stored in the prologue against the - /// original value. It calls __stack_chk_fail if they differ. - bool InsertStackProtectors(); - - /// CreateFailBB - Create a basic block to jump to when the stack protector - /// check fails. - BasicBlock *CreateFailBB(); - - /// ContainsProtectableArray - Check whether the type either is an array or - /// contains an array of sufficient size so that we need stack protectors - /// for it. - bool ContainsProtectableArray(Type *Ty, bool Strong = false, - bool InStruct = false) const; - - /// \brief Check whether a stack allocation has its address taken. - bool HasAddressTaken(const Instruction *AI); - - /// RequiresStackProtector - Check whether or not this function needs a - /// stack protector based upon the stack protector level. - bool RequiresStackProtector(); - public: - static char ID; // Pass identification, replacement for typeid. - StackProtector() : FunctionPass(ID), TLI(0) { - initializeStackProtectorPass(*PassRegistry::getPassRegistry()); - } - StackProtector(const TargetLoweringBase *tli) - : FunctionPass(ID), TLI(tli) { - initializeStackProtectorPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved<DominatorTree>(); - } - - virtual bool runOnFunction(Function &Fn); - }; -} // end anonymous namespace +static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp", + cl::init(true), cl::Hidden); char StackProtector::ID = 0; -INITIALIZE_PASS(StackProtector, "stack-protector", - "Insert stack protectors", false, false) +INITIALIZE_PASS(StackProtector, "stack-protector", "Insert stack protectors", + false, true) -FunctionPass *llvm::createStackProtectorPass(const TargetLoweringBase *tli) { - return new StackProtector(tli); +FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) { + return new StackProtector(TM); +} + +StackProtector::SSPLayoutKind +StackProtector::getSSPLayout(const AllocaInst *AI) const { + return AI ? Layout.lookup(AI) : SSPLK_None; } bool StackProtector::runOnFunction(Function &Fn) { F = &Fn; M = F->getParent(); DT = getAnalysisIfAvailable<DominatorTree>(); + TLI = TM->getTargetLowering(); - if (!RequiresStackProtector()) return false; + if (!RequiresStackProtector()) + return false; + + Attribute Attr = Fn.getAttributes().getAttribute( + AttributeSet::FunctionIndex, "stack-protector-buffer-size"); + if (Attr.isStringAttribute()) + Attr.getValueAsString().getAsInteger(10, SSPBufferSize); ++NumFunProtected; return InsertStackProtectors(); } -/// ContainsProtectableArray - Check whether the type either is an array or -/// contains a char array of sufficient size so that we need stack protectors -/// for it. -bool StackProtector::ContainsProtectableArray(Type *Ty, bool Strong, +/// \param [out] IsLarge is set to true if a protectable array is found and +/// it is "large" ( >= ssp-buffer-size). In the case of a structure with +/// multiple arrays, this gets set if any of them is large. +bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge, + bool Strong, bool InStruct) const { - if (!Ty) return false; + if (!Ty) + return false; if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { - // In strong mode any array, regardless of type and size, triggers a - // protector - if (Strong) - return true; - const TargetMachine &TM = TLI->getTargetMachine(); if (!AT->getElementType()->isIntegerTy(8)) { - Triple Trip(TM.getTargetTriple()); - // If we're on a non-Darwin platform or we're inside of a structure, don't // add stack protectors unless the array is a character array. - if (InStruct || !Trip.isOSDarwin()) - return false; + // However, in strong mode any array, regardless of type and size, + // triggers a protector. + if (!Strong && (InStruct || !Trip.isOSDarwin())) + return false; } // If an array has more than SSPBufferSize bytes of allocated space, then we // emit stack protectors. - if (TM.Options.SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT)) + if (SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT)) { + IsLarge = true; + return true; + } + + if (Strong) + // Require a protector for all arrays in strong mode return true; } const StructType *ST = dyn_cast<StructType>(Ty); - if (!ST) return false; + if (!ST) + return false; + bool NeedsProtector = false; for (StructType::element_iterator I = ST->element_begin(), - E = ST->element_end(); I != E; ++I) - if (ContainsProtectableArray(*I, Strong, true)) - return true; + E = ST->element_end(); + I != E; ++I) + if (ContainsProtectableArray(*I, IsLarge, Strong, true)) { + // If the element is a protectable array and is large (>= SSPBufferSize) + // then we are done. If the protectable array is not large, then + // keep looking in case a subsequent element is a large array. + if (IsLarge) + return true; + NeedsProtector = true; + } - return false; + return NeedsProtector; } bool StackProtector::HasAddressTaken(const Instruction *AI) { for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE; ++UI) { + UI != UE; ++UI) { const User *U = *UI; if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { if (AI == SI->getValueOperand()) @@ -202,11 +175,13 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) { /// address taken. bool StackProtector::RequiresStackProtector() { bool Strong = false; + bool NeedsProtector = false; if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectReq)) - return true; - else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectStrong)) + Attribute::StackProtectReq)) { + NeedsProtector = true; + Strong = true; // Use the same heuristic as strong to determine SSPLayout + } else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectStrong)) Strong = true; else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::StackProtect)) @@ -215,38 +190,156 @@ bool StackProtector::RequiresStackProtector() { for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { BasicBlock *BB = I; - for (BasicBlock::iterator - II = BB->begin(), IE = BB->end(); II != IE; ++II) { + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; + ++II) { if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) { if (AI->isArrayAllocation()) { // SSP-Strong: Enable protectors for any call to alloca, regardless // of size. if (Strong) return true; - + if (const ConstantInt *CI = - dyn_cast<ConstantInt>(AI->getArraySize())) { - unsigned BufferSize = TLI->getTargetMachine().Options.SSPBufferSize; - if (CI->getLimitedValue(BufferSize) >= BufferSize) + dyn_cast<ConstantInt>(AI->getArraySize())) { + if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) { // A call to alloca with size >= SSPBufferSize requires // stack protectors. - return true; - } else // A call to alloca with a variable size requires protectors. - return true; + Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); + NeedsProtector = true; + } else if (Strong) { + // Require protectors for all alloca calls in strong mode. + Layout.insert(std::make_pair(AI, SSPLK_SmallArray)); + NeedsProtector = true; + } + } else { + // A call to alloca with a variable size requires protectors. + Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); + NeedsProtector = true; + } + continue; } - if (ContainsProtectableArray(AI->getAllocatedType(), Strong)) - return true; + bool IsLarge = false; + if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) { + Layout.insert(std::make_pair(AI, IsLarge ? SSPLK_LargeArray + : SSPLK_SmallArray)); + NeedsProtector = true; + continue; + } if (Strong && HasAddressTaken(AI)) { - ++NumAddrTaken; - return true; + ++NumAddrTaken; + Layout.insert(std::make_pair(AI, SSPLK_AddrOf)); + NeedsProtector = true; } } } } - return false; + return NeedsProtector; +} + +static bool InstructionWillNotHaveChain(const Instruction *I) { + return !I->mayHaveSideEffects() && !I->mayReadFromMemory() && + isSafeToSpeculativelyExecute(I); +} + +/// Identify if RI has a previous instruction in the "Tail Position" and return +/// it. Otherwise return 0. +/// +/// This is based off of the code in llvm::isInTailCallPosition. The difference +/// is that it inverts the first part of llvm::isInTailCallPosition since +/// isInTailCallPosition is checking if a call is in a tail call position, and +/// we are searching for an unknown tail call that might be in the tail call +/// position. Once we find the call though, the code uses the same refactored +/// code, returnTypeIsEligibleForTailCall. +static CallInst *FindPotentialTailCall(BasicBlock *BB, ReturnInst *RI, + const TargetLoweringBase *TLI) { + // Establish a reasonable upper bound on the maximum amount of instructions we + // will look through to find a tail call. + unsigned SearchCounter = 0; + const unsigned MaxSearch = 4; + bool NoInterposingChain = true; + + for (BasicBlock::reverse_iterator I = llvm::next(BB->rbegin()), + E = BB->rend(); + I != E && SearchCounter < MaxSearch; ++I) { + Instruction *Inst = &*I; + + // Skip over debug intrinsics and do not allow them to affect our MaxSearch + // counter. + if (isa<DbgInfoIntrinsic>(Inst)) + continue; + + // If we find a call and the following conditions are satisifed, then we + // have found a tail call that satisfies at least the target independent + // requirements of a tail call: + // + // 1. The call site has the tail marker. + // + // 2. The call site either will not cause the creation of a chain or if a + // chain is necessary there are no instructions in between the callsite and + // the call which would create an interposing chain. + // + // 3. The return type of the function does not impede tail call + // optimization. + if (CallInst *CI = dyn_cast<CallInst>(Inst)) { + if (CI->isTailCall() && + (InstructionWillNotHaveChain(CI) || NoInterposingChain) && + returnTypeIsEligibleForTailCall(BB->getParent(), CI, RI, *TLI)) + return CI; + } + + // If we did not find a call see if we have an instruction that may create + // an interposing chain. + NoInterposingChain = + NoInterposingChain && InstructionWillNotHaveChain(Inst); + + // Increment max search. + SearchCounter++; + } + + return 0; +} + +/// Insert code into the entry block that stores the __stack_chk_guard +/// variable onto the stack: +/// +/// entry: +/// StackGuardSlot = alloca i8* +/// StackGuard = load __stack_chk_guard +/// call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) +/// +/// Returns true if the platform/triple supports the stackprotectorcreate pseudo +/// node. +static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, + const TargetLoweringBase *TLI, const Triple &Trip, + AllocaInst *&AI, Value *&StackGuardVar) { + bool SupportsSelectionDAGSP = false; + PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); + unsigned AddressSpace, Offset; + if (TLI->getStackCookieLocation(AddressSpace, Offset)) { + Constant *OffsetVal = + ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); + + StackGuardVar = ConstantExpr::getIntToPtr( + OffsetVal, PointerType::get(PtrTy, AddressSpace)); + } else if (Trip.getOS() == llvm::Triple::OpenBSD) { + StackGuardVar = M->getOrInsertGlobal("__guard_local", PtrTy); + cast<GlobalValue>(StackGuardVar) + ->setVisibility(GlobalValue::HiddenVisibility); + } else { + SupportsSelectionDAGSP = true; + StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + } + + IRBuilder<> B(&F->getEntryBlock().front()); + AI = B.CreateAlloca(PtrTy, 0, "StackGuardSlot"); + LoadInst *LI = B.CreateLoad(StackGuardVar, "StackGuard"); + B.CreateCall2(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), LI, + AI); + + return SupportsSelectionDAGSP; } /// InsertStackProtectors - Insert code into the prologue and epilogue of the @@ -256,102 +349,102 @@ bool StackProtector::RequiresStackProtector() { /// - The epilogue checks the value stored in the prologue against the original /// value. It calls __stack_chk_fail if they differ. bool StackProtector::InsertStackProtectors() { - BasicBlock *FailBB = 0; // The basic block to jump to if check fails. - BasicBlock *FailBBDom = 0; // FailBB's dominator. - AllocaInst *AI = 0; // Place on stack that stores the stack guard. - Value *StackGuardVar = 0; // The stack guard variable. + bool HasPrologue = false; + bool SupportsSelectionDAGSP = + EnableSelectionDAGSP && !TM->Options.EnableFastISel; + AllocaInst *AI = 0; // Place on stack that stores the stack guard. + Value *StackGuardVar = 0; // The stack guard variable. - for (Function::iterator I = F->begin(), E = F->end(); I != E; ) { + for (Function::iterator I = F->begin(), E = F->end(); I != E;) { BasicBlock *BB = I++; ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); - if (!RI) continue; + if (!RI) + continue; - if (!FailBB) { - // Insert code into the entry block that stores the __stack_chk_guard - // variable onto the stack: - // - // entry: - // StackGuardSlot = alloca i8* - // StackGuard = load __stack_chk_guard - // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) - // - PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); - unsigned AddressSpace, Offset; - if (TLI->getStackCookieLocation(AddressSpace, Offset)) { - Constant *OffsetVal = - ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); - - StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal, - PointerType::get(PtrTy, AddressSpace)); + if (!HasPrologue) { + HasPrologue = true; + SupportsSelectionDAGSP &= + CreatePrologue(F, M, RI, TLI, Trip, AI, StackGuardVar); + } + + if (SupportsSelectionDAGSP) { + // Since we have a potential tail call, insert the special stack check + // intrinsic. + Instruction *InsertionPt = 0; + if (CallInst *CI = FindPotentialTailCall(BB, RI, TLI)) { + InsertionPt = CI; } else { - StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + InsertionPt = RI; + // At this point we know that BB has a return statement so it *DOES* + // have a terminator. + assert(InsertionPt != 0 && "BB must have a terminator instruction at " + "this point."); } - BasicBlock &Entry = F->getEntryBlock(); - Instruction *InsPt = &Entry.front(); - - AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt); - LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt); + Function *Intrinsic = + Intrinsic::getDeclaration(M, Intrinsic::stackprotectorcheck); + CallInst::Create(Intrinsic, StackGuardVar, "", InsertionPt); - Value *Args[] = { LI, AI }; - CallInst:: - Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), - Args, "", InsPt); - - // Create the basic block to jump to when the guard check fails. - FailBB = CreateFailBB(); - } + } else { + // If we do not support SelectionDAG based tail calls, generate IR level + // tail calls. + // + // For each block with a return instruction, convert this: + // + // return: + // ... + // ret ... + // + // into this: + // + // return: + // ... + // %1 = load __stack_chk_guard + // %2 = load StackGuardSlot + // %3 = cmp i1 %1, %2 + // br i1 %3, label %SP_return, label %CallStackCheckFailBlk + // + // SP_return: + // ret ... + // + // CallStackCheckFailBlk: + // call void @__stack_chk_fail() + // unreachable + + // Create the FailBB. We duplicate the BB every time since the MI tail + // merge pass will merge together all of the various BB into one including + // fail BB generated by the stack protector pseudo instruction. + BasicBlock *FailBB = CreateFailBB(); + + // Split the basic block before the return instruction. + BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); + + // Update the dominator tree if we need to. + if (DT && DT->isReachableFromEntry(BB)) { + DT->addNewBlock(NewBB, BB); + DT->addNewBlock(FailBB, BB); + } - // For each block with a return instruction, convert this: - // - // return: - // ... - // ret ... - // - // into this: - // - // return: - // ... - // %1 = load __stack_chk_guard - // %2 = load StackGuardSlot - // %3 = cmp i1 %1, %2 - // br i1 %3, label %SP_return, label %CallStackCheckFailBlk - // - // SP_return: - // ret ... - // - // CallStackCheckFailBlk: - // call void @__stack_chk_fail() - // unreachable + // Remove default branch instruction to the new BB. + BB->getTerminator()->eraseFromParent(); - // Split the basic block before the return instruction. - BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); + // Move the newly created basic block to the point right after the old + // basic block so that it's in the "fall through" position. + NewBB->moveAfter(BB); - if (DT && DT->isReachableFromEntry(BB)) { - DT->addNewBlock(NewBB, BB); - FailBBDom = FailBBDom ? DT->findNearestCommonDominator(FailBBDom, BB) :BB; + // Generate the stack protector instructions in the old basic block. + IRBuilder<> B(BB); + LoadInst *LI1 = B.CreateLoad(StackGuardVar); + LoadInst *LI2 = B.CreateLoad(AI); + Value *Cmp = B.CreateICmpEQ(LI1, LI2); + B.CreateCondBr(Cmp, NewBB, FailBB); } - - // Remove default branch instruction to the new BB. - BB->getTerminator()->eraseFromParent(); - - // Move the newly created basic block to the point right after the old basic - // block so that it's in the "fall through" position. - NewBB->moveAfter(BB); - - // Generate the stack protector instructions in the old basic block. - LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB); - LoadInst *LI2 = new LoadInst(AI, "", true, BB); - ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, ""); - BranchInst::Create(NewBB, FailBB, Cmp, BB); } // Return if we didn't modify any basic blocks. I.e., there are no return // statements in the function. - if (!FailBB) return false; - - if (DT && FailBBDom) - DT->addNewBlock(FailBB, FailBBDom); + if (!HasPrologue) + return false; return true; } @@ -359,12 +452,20 @@ bool StackProtector::InsertStackProtectors() { /// CreateFailBB - Create a basic block to jump to when the stack protector /// check fails. BasicBlock *StackProtector::CreateFailBB() { - BasicBlock *FailBB = BasicBlock::Create(F->getContext(), - "CallStackCheckFailBlk", F); - Constant *StackChkFail = - M->getOrInsertFunction("__stack_chk_fail", - Type::getVoidTy(F->getContext()), NULL); - CallInst::Create(StackChkFail, "", FailBB); - new UnreachableInst(F->getContext(), FailBB); + LLVMContext &Context = F->getContext(); + BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F); + IRBuilder<> B(FailBB); + if (Trip.getOS() == llvm::Triple::OpenBSD) { + Constant *StackChkFail = M->getOrInsertFunction( + "__stack_smash_handler", Type::getVoidTy(Context), + Type::getInt8PtrTy(Context), NULL); + + B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH")); + } else { + Constant *StackChkFail = M->getOrInsertFunction( + "__stack_chk_fail", Type::getVoidTy(Context), NULL); + B.CreateCall(StackChkFail); + } + B.CreateUnreachable(); return FailBB; } diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp index f951561..9f44df8 100644 --- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -14,20 +14,20 @@ #define DEBUG_TYPE "stackslotcoloring" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include <vector> @@ -48,13 +48,16 @@ namespace { LiveStacks* LS; MachineFrameInfo *MFI; const TargetInstrInfo *TII; - const MachineLoopInfo *loopInfo; + const MachineBlockFrequencyInfo *MBFI; // SSIntervals - Spill slot intervals. std::vector<LiveInterval*> SSIntervals; - // SSRefs - Keep a list of frame index references for each spill slot. - SmallVector<SmallVector<MachineInstr*, 8>, 16> SSRefs; + // SSRefs - Keep a list of MachineMemOperands for each spill slot. + // MachineMemOperands can be shared between instructions, so we need + // to be careful that renames like [FI0, FI1] -> [FI1, FI2] do not + // become FI0 -> FI1 -> FI2. + SmallVector<SmallVector<MachineMemOperand *, 8>, 16> SSRefs; // OrigAlignments - Alignments of stack objects before coloring. SmallVector<unsigned, 16> OrigAlignments; @@ -89,8 +92,8 @@ namespace { AU.addRequired<SlotIndexes>(); AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveStacks>(); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -103,7 +106,7 @@ namespace { bool OverlapWithAssignments(LiveInterval *li, int Color) const; int ColorSlot(LiveInterval *li); bool ColorSlots(MachineFunction &MF); - void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI, + void RewriteInstruction(MachineInstr *MI, SmallVectorImpl<int> &SlotMapping, MachineFunction &MF); bool RemoveDeadStores(MachineBasicBlock* MBB); }; @@ -139,7 +142,7 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = &*MBBI; - unsigned loopDepth = loopInfo->getLoopDepth(MBB); + BlockFrequency Freq = MBFI->getBlockFreq(MBB); for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); MII != EE; ++MII) { MachineInstr *MI = &*MII; @@ -154,8 +157,19 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { continue; LiveInterval &li = LS->getInterval(FI); if (!MI->isDebugValue()) - li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth); - SSRefs[FI].push_back(MI); + li.weight += LiveIntervals::getSpillWeight(false, true, Freq); + } + for (MachineInstr::mmo_iterator MMOI = MI->memoperands_begin(), + EE = MI->memoperands_end(); MMOI != EE; ++MMOI) { + MachineMemOperand *MMO = *MMOI; + if (const Value *V = MMO->getValue()) { + if (const FixedStackPseudoSourceValue *FSV = + dyn_cast<FixedStackPseudoSourceValue>(V)) { + int FI = FSV->getFrameIndex(); + if (FI >= 0) + SSRefs[FI].push_back(MMO); + } + } } } } @@ -197,7 +211,7 @@ void StackSlotColoring::InitializeSlots() { /// LiveIntervals that have already been assigned to the specified color. bool StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { - const SmallVector<LiveInterval*,4> &OtherLIs = Assignments[Color]; + const SmallVectorImpl<LiveInterval *> &OtherLIs = Assignments[Color]; for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) { LiveInterval *OtherLI = OtherLIs[i]; if (OtherLI->overlaps(*li)) @@ -291,16 +305,26 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { if (!Changed) return false; - // Rewrite all MO_FrameIndex operands. - SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs()); + // Rewrite all MachineMemOperands. for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) { int NewFI = SlotMapping[SS]; if (NewFI == -1 || (NewFI == (int)SS)) continue; - SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS]; - for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) - RewriteInstruction(RefMIs[i], SS, NewFI, MF); + const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI); + SmallVectorImpl<MachineMemOperand *> &RefMMOs = SSRefs[SS]; + for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i) + RefMMOs[i]->setValue(NewSV); + } + + // Rewrite all MO_FrameIndex operands. Look for dead stores. + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = &*MBBI; + for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); + MII != EE; ++MII) + RewriteInstruction(MII, SlotMapping, MF); + RemoveDeadStores(MBB); } // Delete unused stack slots. @@ -315,28 +339,24 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { /// RewriteInstruction - Rewrite specified instruction by replacing references /// to old frame index with new one. -void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, - int NewFI, MachineFunction &MF) { +void StackSlotColoring::RewriteInstruction(MachineInstr *MI, + SmallVectorImpl<int> &SlotMapping, + MachineFunction &MF) { // Update the operands. for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isFI()) continue; - int FI = MO.getIndex(); - if (FI != OldFI) + int OldFI = MO.getIndex(); + if (OldFI < 0) + continue; + int NewFI = SlotMapping[OldFI]; + if (NewFI == -1 || NewFI == OldFI) continue; MO.setIndex(NewFI); } - // Update the memory references. This changes the MachineMemOperands - // directly. They may be in use by multiple instructions, however all - // instructions using OldFI are being rewritten to use NewFI. - const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI); - const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI); - for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), - E = MI->memoperands_end(); I != E; ++I) - if ((*I)->getValue() == OldSV) - (*I)->setValue(NewSV); + // The MachineMemOperands have already been updated. } @@ -357,10 +377,19 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { if (DCELimit != -1 && (int)NumDead >= DCELimit) break; + int FirstSS, SecondSS; + if (TII->isStackSlotCopy(I, FirstSS, SecondSS) && + FirstSS == SecondSS && + FirstSS != -1) { + ++NumDead; + changed = true; + toErase.push_back(I); + continue; + } + MachineBasicBlock::iterator NextMI = llvm::next(I); if (NextMI == MBB->end()) continue; - int FirstSS, SecondSS; unsigned LoadReg = 0; unsigned StoreReg = 0; if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue; @@ -379,7 +408,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { ++I; } - for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(), + for (SmallVectorImpl<MachineInstr *>::iterator I = toErase.begin(), E = toErase.end(); I != E; ++I) (*I)->eraseFromParent(); @@ -396,7 +425,7 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { MFI = MF.getFrameInfo(); TII = MF.getTarget().getInstrInfo(); LS = &getAnalysis<LiveStacks>(); - loopInfo = &getAnalysis<MachineLoopInfo>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); bool Changed = false; @@ -430,10 +459,5 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { Assignments[i].clear(); Assignments.clear(); - if (Changed) { - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - Changed |= RemoveDeadStores(I); - } - return Changed; } diff --git a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp deleted file mode 100644 index b337c53..0000000 --- a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp +++ /dev/null @@ -1,825 +0,0 @@ -//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass eliminates PHI instructions by aggressively coalescing the copies -// that would be inserted by a naive algorithm and only inserting the copies -// that are necessary. The coalescing technique initially assumes that all -// registers appearing in a PHI instruction do not interfere. It then eliminates -// proven interferences, using dominators to only perform a linear number of -// interference tests instead of the quadratic number of interference tests -// that this would naively require. This is a technique derived from: -// -// Budimlic, et al. Fast copy coalescing and live-range identification. -// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language -// Design and Implementation (Berlin, Germany, June 17 - 19, 2002). -// PLDI '02. ACM, New York, NY, 25-32. -// -// The original implementation constructs a data structure they call a dominance -// forest for this purpose. The dominance forest was shown to be unnecessary, -// as it is possible to emulate the creation and traversal of a dominance forest -// by directly using the dominator tree, rather than actually constructing the -// dominance forest. This technique is explained in: -// -// Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code -// Quality and Efficiency, -// In Proceedings of the 7th annual IEEE/ACM International Symposium on Code -// Generation and Optimization (Seattle, Washington, March 22 - 25, 2009). -// CGO '09. IEEE, Washington, DC, 114-125. -// -// Careful implementation allows for all of the dominator forest interference -// checks to be performed at once in a single depth-first traversal of the -// dominator tree, which is what is implemented here. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "strongphielim" -#include "llvm/CodeGen/Passes.h" -#include "PHIEliminationUtils.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetInstrInfo.h" -using namespace llvm; - -namespace { - class StrongPHIElimination : public MachineFunctionPass { - public: - static char ID; // Pass identification, replacement for typeid - StrongPHIElimination() : MachineFunctionPass(ID) { - initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage&) const; - bool runOnMachineFunction(MachineFunction&); - - private: - /// This struct represents a single node in the union-find data structure - /// representing the variable congruence classes. There is one difference - /// from a normal union-find data structure. We steal two bits from the parent - /// pointer . One of these bits is used to represent whether the register - /// itself has been isolated, and the other is used to represent whether the - /// PHI with that register as its destination has been isolated. - /// - /// Note that this leads to the strange situation where the leader of a - /// congruence class may no longer logically be a member, due to being - /// isolated. - struct Node { - enum Flags { - kRegisterIsolatedFlag = 1, - kPHIIsolatedFlag = 2 - }; - Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); } - - Node *getLeader(); - - PointerIntPair<Node*, 2> parent; - unsigned value; - unsigned rank; - }; - - /// Add a register in a new congruence class containing only itself. - void addReg(unsigned); - - /// Join the congruence classes of two registers. This function is biased - /// towards the left argument, i.e. after - /// - /// addReg(r2); - /// unionRegs(r1, r2); - /// - /// the leader of the unioned congruence class is the same as the leader of - /// r1's congruence class prior to the union. This is actually relied upon - /// in the copy insertion code. - void unionRegs(unsigned, unsigned); - - /// Get the color of a register. The color is 0 if the register has been - /// isolated. - unsigned getRegColor(unsigned); - - // Isolate a register. - void isolateReg(unsigned); - - /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been - /// isolated. Otherwise, it is the original color of its destination and - /// all of its operands (before they were isolated, if they were). - unsigned getPHIColor(MachineInstr*); - - /// Isolate a PHI. - void isolatePHI(MachineInstr*); - - /// Traverses a basic block, splitting any interferences found between - /// registers in the same congruence class. It takes two DenseMaps as - /// arguments that it also updates: CurrentDominatingParent, which maps - /// a color to the register in that congruence class whose definition was - /// most recently seen, and ImmediateDominatingParent, which maps a register - /// to the register in the same congruence class that most immediately - /// dominates it. - /// - /// This function assumes that it is being called in a depth-first traversal - /// of the dominator tree. - void SplitInterferencesForBasicBlock( - MachineBasicBlock&, - DenseMap<unsigned, unsigned> &CurrentDominatingParent, - DenseMap<unsigned, unsigned> &ImmediateDominatingParent); - - // Lowers a PHI instruction, inserting copies of the source and destination - // registers as necessary. - void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*); - - // Merges the live interval of Reg into NewReg and renames Reg to NewReg - // everywhere that Reg appears. Requires Reg and NewReg to have non- - // overlapping lifetimes. - void MergeLIsAndRename(unsigned Reg, unsigned NewReg); - - MachineRegisterInfo *MRI; - const TargetInstrInfo *TII; - MachineDominatorTree *DT; - LiveIntervals *LI; - - BumpPtrAllocator Allocator; - - DenseMap<unsigned, Node*> RegNodeMap; - - // Maps a basic block to a list of its defs of registers that appear as PHI - // sources. - DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs; - - // Maps a color to a pair of a MachineInstr* and a virtual register, which - // is the operand of that PHI corresponding to the current basic block. - DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor; - - // FIXME: Can these two data structures be combined? Would a std::multimap - // be any better? - - // Stores pairs of predecessor basic blocks and the source registers of - // inserted copy instructions. - typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet; - SrcCopySet InsertedSrcCopySet; - - // Maps pairs of predecessor basic blocks and colors to their defining copy - // instructions. - typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*> - SrcCopyMap; - SrcCopyMap InsertedSrcCopyMap; - - // Maps inserted destination copy registers to their defining copy - // instructions. - typedef DenseMap<unsigned, MachineInstr*> DestCopyMap; - DestCopyMap InsertedDestCopies; - }; - - struct MIIndexCompare { - MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { } - - bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { - return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS); - } - - LiveIntervals *LI; - }; -} // namespace - -STATISTIC(NumPHIsLowered, "Number of PHIs lowered"); -STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted"); -STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted"); - -char StrongPHIElimination::ID = 0; -INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination", - "Eliminate PHI nodes for register allocation, intelligently", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination", - "Eliminate PHI nodes for register allocation, intelligently", false, false) - -char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID; - -void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<SlotIndexes>(); - AU.addPreserved<SlotIndexes>(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<LiveIntervals>(); - MachineFunctionPass::getAnalysisUsage(AU); -} - -static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) { - // FIXME: This only needs to check from the first terminator, as only the - // first terminator can use a virtual register. - for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) { - assert (RI != MBB->rend()); - MachineInstr *MI = &*RI; - - for (MachineInstr::mop_iterator OI = MI->operands_begin(), - OE = MI->operands_end(); OI != OE; ++OI) { - MachineOperand &MO = *OI; - if (MO.isReg() && MO.isUse() && MO.getReg() == Reg) - return &MO; - } - } -} - -bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { - MRI = &MF.getRegInfo(); - TII = MF.getTarget().getInstrInfo(); - DT = &getAnalysis<MachineDominatorTree>(); - LI = &getAnalysis<LiveIntervals>(); - - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { - unsigned DestReg = BBI->getOperand(0).getReg(); - addReg(DestReg); - PHISrcDefs[I].push_back(BBI); - - for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { - MachineOperand &SrcMO = BBI->getOperand(i); - unsigned SrcReg = SrcMO.getReg(); - addReg(SrcReg); - unionRegs(DestReg, SrcReg); - - MachineInstr *DefMI = MRI->getVRegDef(SrcReg); - if (DefMI) - PHISrcDefs[DefMI->getParent()].push_back(DefMI); - } - } - } - - // Perform a depth-first traversal of the dominator tree, splitting - // interferences amongst PHI-congruence classes. - DenseMap<unsigned, unsigned> CurrentDominatingParent; - DenseMap<unsigned, unsigned> ImmediateDominatingParent; - for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()), - DE = df_end(DT->getRootNode()); DI != DE; ++DI) { - SplitInterferencesForBasicBlock(*DI->getBlock(), - CurrentDominatingParent, - ImmediateDominatingParent); - } - - // Insert copies for all PHI source and destination registers. - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { - InsertCopiesForPHI(BBI, I); - } - } - - // FIXME: Preserve the equivalence classes during copy insertion and use - // the preversed equivalence classes instead of recomputing them. - RegNodeMap.clear(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { - unsigned DestReg = BBI->getOperand(0).getReg(); - addReg(DestReg); - - for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { - unsigned SrcReg = BBI->getOperand(i).getReg(); - addReg(SrcReg); - unionRegs(DestReg, SrcReg); - } - } - } - - DenseMap<unsigned, unsigned> RegRenamingMap; - bool Changed = false; - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - while (BBI != BBE && BBI->isPHI()) { - MachineInstr *PHI = BBI; - - assert(PHI->getNumOperands() > 0); - - unsigned SrcReg = PHI->getOperand(1).getReg(); - unsigned SrcColor = getRegColor(SrcReg); - unsigned NewReg = RegRenamingMap[SrcColor]; - if (!NewReg) { - NewReg = SrcReg; - RegRenamingMap[SrcColor] = SrcReg; - } - MergeLIsAndRename(SrcReg, NewReg); - - unsigned DestReg = PHI->getOperand(0).getReg(); - if (!InsertedDestCopies.count(DestReg)) - MergeLIsAndRename(DestReg, NewReg); - - for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) { - unsigned SrcReg = PHI->getOperand(i).getReg(); - MergeLIsAndRename(SrcReg, NewReg); - } - - ++BBI; - LI->RemoveMachineInstrFromMaps(PHI); - PHI->eraseFromParent(); - Changed = true; - } - } - - // Due to the insertion of copies to split live ranges, the live intervals are - // guaranteed to not overlap, except in one case: an original PHI source and a - // PHI destination copy. In this case, they have the same value and thus don't - // truly intersect, so we merge them into the value live at that point. - // FIXME: Is there some better way we can handle this? - for (DestCopyMap::iterator I = InsertedDestCopies.begin(), - E = InsertedDestCopies.end(); I != E; ++I) { - unsigned DestReg = I->first; - unsigned DestColor = getRegColor(DestReg); - unsigned NewReg = RegRenamingMap[DestColor]; - - LiveInterval &DestLI = LI->getInterval(DestReg); - LiveInterval &NewLI = LI->getInterval(NewReg); - - assert(DestLI.ranges.size() == 1 - && "PHI destination copy's live interval should be a single live " - "range from the beginning of the BB to the copy instruction."); - LiveRange *DestLR = DestLI.begin(); - VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start); - if (!NewVNI) { - NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator()); - MachineInstr *CopyInstr = I->second; - CopyInstr->getOperand(1).setIsKill(true); - } - - LiveRange NewLR(DestLR->start, DestLR->end, NewVNI); - NewLI.addRange(NewLR); - - LI->removeInterval(DestReg); - MRI->replaceRegWith(DestReg, NewReg); - } - - // Adjust the live intervals of all PHI source registers to handle the case - // where the PHIs in successor blocks were the only later uses of the source - // register. - for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(), - E = InsertedSrcCopySet.end(); I != E; ++I) { - MachineBasicBlock *MBB = I->first; - unsigned SrcReg = I->second; - if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)]) - SrcReg = RenamedRegister; - - LiveInterval &SrcLI = LI->getInterval(SrcReg); - - bool isLiveOut = false; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) { - isLiveOut = true; - break; - } - } - - if (isLiveOut) - continue; - - MachineOperand *LastUse = findLastUse(MBB, SrcReg); - assert(LastUse); - SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); - SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB)); - LastUse->setIsKill(true); - } - - Allocator.Reset(); - RegNodeMap.clear(); - PHISrcDefs.clear(); - InsertedSrcCopySet.clear(); - InsertedSrcCopyMap.clear(); - InsertedDestCopies.clear(); - - return Changed; -} - -void StrongPHIElimination::addReg(unsigned Reg) { - Node *&N = RegNodeMap[Reg]; - if (!N) - N = new (Allocator) Node(Reg); -} - -StrongPHIElimination::Node* -StrongPHIElimination::Node::getLeader() { - Node *N = this; - Node *Parent = parent.getPointer(); - Node *Grandparent = Parent->parent.getPointer(); - - while (Parent != Grandparent) { - N->parent.setPointer(Grandparent); - N = Grandparent; - Parent = Parent->parent.getPointer(); - Grandparent = Parent->parent.getPointer(); - } - - return Parent; -} - -unsigned StrongPHIElimination::getRegColor(unsigned Reg) { - DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg); - if (RI == RegNodeMap.end()) - return 0; - Node *Node = RI->second; - if (Node->parent.getInt() & Node::kRegisterIsolatedFlag) - return 0; - return Node->getLeader()->value; -} - -void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) { - Node *Node1 = RegNodeMap[Reg1]->getLeader(); - Node *Node2 = RegNodeMap[Reg2]->getLeader(); - - if (Node1->rank > Node2->rank) { - Node2->parent.setPointer(Node1->getLeader()); - } else if (Node1->rank < Node2->rank) { - Node1->parent.setPointer(Node2->getLeader()); - } else if (Node1 != Node2) { - Node2->parent.setPointer(Node1->getLeader()); - Node1->rank++; - } -} - -void StrongPHIElimination::isolateReg(unsigned Reg) { - Node *Node = RegNodeMap[Reg]; - Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag); -} - -unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) { - assert(PHI->isPHI()); - - unsigned DestReg = PHI->getOperand(0).getReg(); - Node *DestNode = RegNodeMap[DestReg]; - if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag) - return 0; - - for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { - unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg()); - if (SrcColor) - return SrcColor; - } - return 0; -} - -void StrongPHIElimination::isolatePHI(MachineInstr *PHI) { - assert(PHI->isPHI()); - Node *Node = RegNodeMap[PHI->getOperand(0).getReg()]; - Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag); -} - -/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any -/// interferences found between registers in the same congruence class. It -/// takes two DenseMaps as arguments that it also updates: -/// -/// 1) CurrentDominatingParent, which maps a color to the register in that -/// congruence class whose definition was most recently seen. -/// -/// 2) ImmediateDominatingParent, which maps a register to the register in the -/// same congruence class that most immediately dominates it. -/// -/// This function assumes that it is being called in a depth-first traversal -/// of the dominator tree. -/// -/// The algorithm used here is a generalization of the dominance-based SSA test -/// for two variables. If there are variables a_1, ..., a_n such that -/// -/// def(a_1) dom ... dom def(a_n), -/// -/// then we can test for an interference between any two a_i by only using O(n) -/// interference tests between pairs of variables. If i < j and a_i and a_j -/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1). -/// Thus, in order to test for an interference involving a_i, we need only check -/// for a potential interference with a_i+1. -/// -/// This method can be generalized to arbitrary sets of variables by performing -/// a depth-first traversal of the dominator tree. As we traverse down a branch -/// of the dominator tree, we keep track of the current dominating variable and -/// only perform an interference test with that variable. However, when we go to -/// another branch of the dominator tree, the definition of the current dominating -/// variable may no longer dominate the current block. In order to correct this, -/// we need to use a stack of past choices of the current dominating variable -/// and pop from this stack until we find a variable whose definition actually -/// dominates the current block. -/// -/// There will be one push on this stack for each variable that has become the -/// current dominating variable, so instead of using an explicit stack we can -/// simply associate the previous choice for a current dominating variable with -/// the new choice. This works better in our implementation, where we test for -/// interference in multiple distinct sets at once. -void -StrongPHIElimination::SplitInterferencesForBasicBlock( - MachineBasicBlock &MBB, - DenseMap<unsigned, unsigned> &CurrentDominatingParent, - DenseMap<unsigned, unsigned> &ImmediateDominatingParent) { - // Sort defs by their order in the original basic block, as the code below - // assumes that it is processing definitions in dominance order. - std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB]; - std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI)); - - for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(), - BBE = DefInstrs.end(); BBI != BBE; ++BBI) { - for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(), - E = (*BBI)->operands_end(); I != E; ++I) { - const MachineOperand &MO = *I; - - // FIXME: This would be faster if it were possible to bail out of checking - // an instruction's operands after the explicit defs, but this is incorrect - // for variadic instructions, which may appear before register allocation - // in the future. - if (!MO.isReg() || !MO.isDef()) - continue; - - unsigned DestReg = MO.getReg(); - if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg)) - continue; - - // If the virtual register being defined is not used in any PHI or has - // already been isolated, then there are no more interferences to check. - unsigned DestColor = getRegColor(DestReg); - if (!DestColor) - continue; - - // The input to this pass sometimes is not in SSA form in every basic - // block, as some virtual registers have redefinitions. We could eliminate - // this by fixing the passes that generate the non-SSA code, or we could - // handle it here by tracking defining machine instructions rather than - // virtual registers. For now, we just handle the situation conservatively - // in a way that will possibly lead to false interferences. - unsigned &CurrentParent = CurrentDominatingParent[DestColor]; - unsigned NewParent = CurrentParent; - if (NewParent == DestReg) - continue; - - // Pop registers from the stack represented by ImmediateDominatingParent - // until we find a parent that dominates the current instruction. - while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI) - || !getRegColor(NewParent))) - NewParent = ImmediateDominatingParent[NewParent]; - - // If NewParent is nonzero, then its definition dominates the current - // instruction, so it is only necessary to check for the liveness of - // NewParent in order to check for an interference. - if (NewParent - && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) { - // If there is an interference, always isolate the new register. This - // could be improved by using a heuristic that decides which of the two - // registers to isolate. - isolateReg(DestReg); - CurrentParent = NewParent; - } else { - // If there is no interference, update ImmediateDominatingParent and set - // the CurrentDominatingParent for this color to the current register. - ImmediateDominatingParent[DestReg] = NewParent; - CurrentParent = DestReg; - } - } - } - - // We now walk the PHIs in successor blocks and check for interferences. This - // is necessary because the use of a PHI's operands are logically contained in - // the predecessor block. The def of a PHI's destination register is processed - // along with the other defs in a basic block. - - CurrentPHIForColor.clear(); - - for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), - SE = MBB.succ_end(); SI != SE; ++SI) { - for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { - MachineInstr *PHI = BBI; - - // If a PHI is already isolated, either by being isolated directly or - // having all of its operands isolated, ignore it. - unsigned Color = getPHIColor(PHI); - if (!Color) - continue; - - // Find the index of the PHI operand that corresponds to this basic block. - unsigned PredIndex; - for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) { - if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB) - break; - } - assert(PredIndex < PHI->getNumOperands()); - unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg(); - - // Pop registers from the stack represented by ImmediateDominatingParent - // until we find a parent that dominates the current instruction. - unsigned &CurrentParent = CurrentDominatingParent[Color]; - unsigned NewParent = CurrentParent; - while (NewParent - && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB) - || !getRegColor(NewParent))) - NewParent = ImmediateDominatingParent[NewParent]; - CurrentParent = NewParent; - - // If there is an interference with a register, always isolate the - // register rather than the PHI. It is also possible to isolate the - // PHI, but that introduces copies for all of the registers involved - // in that PHI. - if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB) - && NewParent != PredOperandReg) - isolateReg(NewParent); - - std::pair<MachineInstr*, unsigned> - &CurrentPHI = CurrentPHIForColor[Color]; - - // If two PHIs have the same operand from every shared predecessor, then - // they don't actually interfere. Otherwise, isolate the current PHI. This - // could possibly be improved, e.g. we could isolate the PHI with the - // fewest operands. - if (CurrentPHI.first && CurrentPHI.second != PredOperandReg) - isolatePHI(PHI); - else - CurrentPHI = std::make_pair(PHI, PredOperandReg); - } - } -} - -void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, - MachineBasicBlock *MBB) { - assert(PHI->isPHI()); - ++NumPHIsLowered; - unsigned PHIColor = getPHIColor(PHI); - - for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { - MachineOperand &SrcMO = PHI->getOperand(i); - - // If a source is defined by an implicit def, there is no need to insert a - // copy in the predecessor. - if (SrcMO.isUndef()) - continue; - - unsigned SrcReg = SrcMO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && - "Machine PHI Operands must all be virtual registers!"); - - MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB(); - unsigned SrcColor = getRegColor(SrcReg); - - // If neither the PHI nor the operand were isolated, then we only need to - // set the phi-kill flag on the VNInfo at this PHI. - if (PHIColor && SrcColor == PHIColor) { - LiveInterval &SrcInterval = LI->getInterval(SrcReg); - SlotIndex PredIndex = LI->getMBBEndIdx(PredBB); - VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex); - (void)SrcVNI; - assert(SrcVNI); - continue; - } - - unsigned CopyReg = 0; - if (PHIColor) { - SrcCopyMap::const_iterator I - = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor)); - CopyReg - = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0; - } - - if (!CopyReg) { - const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); - CopyReg = MRI->createVirtualRegister(RC); - - MachineBasicBlock::iterator - CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg); - unsigned SrcSubReg = SrcMO.getSubReg(); - MachineInstr *CopyInstr = BuildMI(*PredBB, - CopyInsertPoint, - PHI->getDebugLoc(), - TII->get(TargetOpcode::COPY), - CopyReg).addReg(SrcReg, 0, SrcSubReg); - LI->InsertMachineInstrInMaps(CopyInstr); - ++NumSrcCopiesInserted; - - // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for - // the newly added range. - LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr); - InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg)); - - addReg(CopyReg); - if (PHIColor) { - unionRegs(PHIColor, CopyReg); - assert(getRegColor(CopyReg) != CopyReg); - } else { - PHIColor = CopyReg; - assert(getRegColor(CopyReg) == CopyReg); - } - - // Insert into map if not already there. - InsertedSrcCopyMap.insert(std::make_pair(std::make_pair(PredBB, PHIColor), - CopyInstr)); - } - - SrcMO.setReg(CopyReg); - - // If SrcReg is not live beyond the PHI, trim its interval so that it is no - // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are - // processed later, but this is still correct to do at this point because we - // never rely on LiveIntervals being correct while inserting copies. - // FIXME: Should this just count uses at PHIs like the normal PHIElimination - // pass does? - LiveInterval &SrcLI = LI->getInterval(SrcReg); - SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); - SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - SlotIndex NextInstrIndex = PHIIndex.getNextIndex(); - if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex)) - SrcLI.removeRange(MBBStartIndex, PHIIndex, true); - } - - unsigned DestReg = PHI->getOperand(0).getReg(); - unsigned DestColor = getRegColor(DestReg); - - if (PHIColor && DestColor == PHIColor) { - LiveInterval &DestLI = LI->getInterval(DestReg); - - // Set the phi-def flag for the VN at this PHI. - SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot()); - assert(DestVNI); - - // Prior to PHI elimination, the live ranges of PHIs begin at their defining - // instruction. After PHI elimination, PHI instructions are replaced by VNs - // with the phi-def flag set, and the live ranges of these VNs start at the - // beginning of the basic block. - SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); - DestVNI->def = MBBStartIndex; - DestLI.addRange(LiveRange(MBBStartIndex, - PHIIndex.getRegSlot(), - DestVNI)); - return; - } - - const TargetRegisterClass *RC = MRI->getRegClass(DestReg); - unsigned CopyReg = MRI->createVirtualRegister(RC); - - MachineInstr *CopyInstr = BuildMI(*MBB, - MBB->SkipPHIsAndLabels(MBB->begin()), - PHI->getDebugLoc(), - TII->get(TargetOpcode::COPY), - DestReg).addReg(CopyReg); - LI->InsertMachineInstrInMaps(CopyInstr); - PHI->getOperand(0).setReg(CopyReg); - ++NumDestCopiesInserted; - - // Add the region from the beginning of MBB to the copy instruction to - // CopyReg's live interval, and give the VNInfo the phidef flag. - LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg); - SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); - SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr); - VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex, - LI->getVNInfoAllocator()); - CopyLI.addRange(LiveRange(MBBStartIndex, - DestCopyIndex.getRegSlot(), - CopyVNI)); - - // Adjust DestReg's live interval to adjust for its new definition at - // CopyInstr. - LiveInterval &DestLI = LI->getOrCreateInterval(DestReg); - SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot()); - - VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); - assert(DestVNI); - DestVNI->def = DestCopyIndex.getRegSlot(); - - InsertedDestCopies[CopyReg] = CopyInstr; -} - -void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) { - if (Reg == NewReg) - return; - - LiveInterval &OldLI = LI->getInterval(Reg); - LiveInterval &NewLI = LI->getInterval(NewReg); - - // Merge the live ranges of the two registers. - DenseMap<VNInfo*, VNInfo*> VNMap; - for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end(); - LRI != LRE; ++LRI) { - LiveRange OldLR = *LRI; - VNInfo *OldVN = OldLR.valno; - - VNInfo *&NewVN = VNMap[OldVN]; - if (!NewVN) { - NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator()); - VNMap[OldVN] = NewVN; - } - - LiveRange LR(OldLR.start, OldLR.end, NewVN); - NewLI.addRange(LR); - } - - // Remove the LiveInterval for the register being renamed and replace all - // of its defs and uses with the new register. - LI->removeInterval(Reg); - MRI->replaceRegWith(Reg, NewReg); -} diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp index 1ec8817..ff0181e 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -86,7 +86,7 @@ namespace { void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, DenseMap<unsigned, unsigned> &LocalVRMap, - SmallVector<std::pair<unsigned,unsigned>, 4> &Copies, + SmallVectorImpl<std::pair<unsigned,unsigned> > &Copies, const DenseSet<unsigned> &UsedByPhi, bool Remove); void DuplicateInstruction(MachineInstr *MI, @@ -96,7 +96,7 @@ namespace { DenseMap<unsigned, unsigned> &LocalVRMap, const DenseSet<unsigned> &UsedByPhi); void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, - SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, SmallSetVector<MachineBasicBlock*, 8> &Succs); bool TailDuplicateBlocks(MachineFunction &MF); bool shouldTailDuplicate(const MachineFunction &MF, @@ -104,14 +104,14 @@ namespace { bool isSimpleBB(MachineBasicBlock *TailBB); bool canCompletelyDuplicateBB(MachineBasicBlock &BB); bool duplicateSimpleBB(MachineBasicBlock *TailBB, - SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, const DenseSet<unsigned> &RegsUsedByPhi, - SmallVector<MachineInstr*, 16> &Copies); + SmallVectorImpl<MachineInstr *> &Copies); bool TailDuplicate(MachineBasicBlock *TailBB, bool IsSimple, MachineFunction &MF, - SmallVector<MachineBasicBlock*, 8> &TDBBs, - SmallVector<MachineInstr*, 16> &Copies); + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + SmallVectorImpl<MachineInstr *> &Copies); bool TailDuplicateAndUpdate(MachineBasicBlock *MBB, bool IsSimple, MachineFunction &MF); @@ -382,13 +382,11 @@ void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, /// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB. /// Remember the source register that's contributed by PredBB and update SSA /// update map. -void TailDuplicatePass::ProcessPHI(MachineInstr *MI, - MachineBasicBlock *TailBB, - MachineBasicBlock *PredBB, - DenseMap<unsigned, unsigned> &LocalVRMap, - SmallVector<std::pair<unsigned,unsigned>, 4> &Copies, - const DenseSet<unsigned> &RegsUsedByPhi, - bool Remove) { +void TailDuplicatePass::ProcessPHI( + MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, + DenseMap<unsigned, unsigned> &LocalVRMap, + SmallVectorImpl<std::pair<unsigned, unsigned> > &Copies, + const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) { unsigned DefReg = MI->getOperand(0).getReg(); unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); assert(SrcOpIdx && "Unable to find matching PHI source?"); @@ -452,7 +450,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, /// instructions in them accordingly. void TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, - SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, SmallSetVector<MachineBasicBlock*,8> &Succs) { for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), SE = Succs.end(); SI != SE; ++SI) { @@ -640,8 +638,6 @@ bothUsedInPHI(const MachineBasicBlock &A, bool TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { - SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end()); - for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(), PE = BB.pred_end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; @@ -662,9 +658,9 @@ TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { bool TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, - SmallVector<MachineBasicBlock*, 8> &TDBBs, - const DenseSet<unsigned> &UsedByPhi, - SmallVector<MachineInstr*, 16> &Copies) { + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + const DenseSet<unsigned> &UsedByPhi, + SmallVectorImpl<MachineInstr *> &Copies) { SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(), TailBB->succ_end()); SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), @@ -742,8 +738,8 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, bool IsSimple, MachineFunction &MF, - SmallVector<MachineBasicBlock*, 8> &TDBBs, - SmallVector<MachineInstr*, 16> &Copies) { + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + SmallVectorImpl<MachineInstr *> &Copies) { DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); DenseSet<unsigned> UsedByPhi; diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp index 20eb918..bf4fd65 100644 --- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" @@ -276,6 +277,36 @@ bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI, return false; } +bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, + unsigned SubIdx, unsigned &Size, + unsigned &Offset, + const TargetMachine *TM) const { + if (!SubIdx) { + Size = RC->getSize(); + Offset = 0; + return true; + } + unsigned BitSize = TM->getRegisterInfo()->getSubRegIdxSize(SubIdx); + // Convert bit size to byte size to be consistent with + // MCRegisterClass::getSize(). + if (BitSize % 8) + return false; + + int BitOffset = TM->getRegisterInfo()->getSubRegIdxOffset(SubIdx); + if (BitOffset < 0 || BitOffset % 8) + return false; + + Size = BitSize /= 8; + Offset = (unsigned)BitOffset / 8; + + assert(RC->getSize() >= (Offset + Size) && "bad subregister range"); + + if (!TM->getDataLayout()->isLittleEndian()) { + Offset = RC->getSize() - (Offset + Size); + } + return true; +} + void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, @@ -364,6 +395,7 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, // Ask the target to do the actual folding. if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) { + NewMI->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); // Add a memory operand, foldMemoryOperandImpl doesn't do that. assert((!(Flags & MachineMemOperand::MOStore) || NewMI->mayStore()) && @@ -424,9 +456,19 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, NewMI = MBB.insert(MI, NewMI); // Copy the memoperands from the load to the folded instruction. - NewMI->setMemRefs(LoadMI->memoperands_begin(), - LoadMI->memoperands_end()); - + if (MI->memoperands_empty()) { + NewMI->setMemRefs(LoadMI->memoperands_begin(), + LoadMI->memoperands_end()); + } + else { + // Handle the rare case of folding multiple loads. + NewMI->setMemRefs(MI->memoperands_begin(), + MI->memoperands_end()); + for (MachineInstr::mmo_iterator I = LoadMI->memoperands_begin(), + E = LoadMI->memoperands_end(); I != E; ++I) { + NewMI->addMemOperand(MF, *I); + } + } return NewMI; } @@ -630,6 +672,10 @@ unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel, return 1; } +unsigned TargetInstrInfo::getPredicationCost(const MachineInstr *) const { + return 0; +} + unsigned TargetInstrInfo:: getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, @@ -668,27 +714,13 @@ getOperandLatency(const InstrItineraryData *ItinData, /// lookup, do so. Otherwise return -1. int TargetInstrInfo::computeDefOperandLatency( const InstrItineraryData *ItinData, - const MachineInstr *DefMI, bool FindMin) const { + const MachineInstr *DefMI) const { // Let the target hook getInstrLatency handle missing itineraries. if (!ItinData) return getInstrLatency(ItinData, DefMI); - // Return a latency based on the itinerary properties and defining instruction - // if possible. Some common subtargets don't require per-operand latency, - // especially for minimum latencies. - if (FindMin) { - // If MinLatency is valid, call getInstrLatency. This uses Stage latency if - // it exists before defaulting to MinLatency. - if (ItinData->SchedModel->MinLatency >= 0) - return getInstrLatency(ItinData, DefMI); - - // If MinLatency is invalid, OperandLatency is interpreted as MinLatency. - // For empty itineraries, short-cirtuit the check and default to one cycle. - if (ItinData->isEmpty()) - return 1; - } - else if(ItinData->isEmpty()) + if(ItinData->isEmpty()) return defaultDefLatency(ItinData->SchedModel, DefMI); // ...operand lookup required @@ -709,10 +741,9 @@ int TargetInstrInfo::computeDefOperandLatency( unsigned TargetInstrInfo:: computeOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx, - bool FindMin) const { + const MachineInstr *UseMI, unsigned UseIdx) const { - int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin); + int DefLatency = computeDefOperandLatency(ItinData, DefMI); if (DefLatency >= 0) return DefLatency; @@ -732,8 +763,7 @@ computeOperandLatency(const InstrItineraryData *ItinData, unsigned InstrLatency = getInstrLatency(ItinData, DefMI); // Expected latency is the max of the stage latency and itinerary props. - if (!FindMin) - InstrLatency = std::max(InstrLatency, - defaultDefLatency(ItinData->SchedModel, DefMI)); + InstrLatency = std::max(InstrLatency, + defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; } diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp index 8074d16..30305af 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -191,6 +191,11 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; Names[RTLIB::NEARBYINT_F128] = "nearbyintl"; Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; + Names[RTLIB::ROUND_F32] = "roundf"; + Names[RTLIB::ROUND_F64] = "round"; + Names[RTLIB::ROUND_F80] = "roundl"; + Names[RTLIB::ROUND_F128] = "roundl"; + Names[RTLIB::ROUND_PPCF128] = "roundl"; Names[RTLIB::FLOOR_F32] = "floorf"; Names[RTLIB::FLOOR_F64] = "floor"; Names[RTLIB::FLOOR_F80] = "floorl"; @@ -313,34 +318,62 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16] = "__sync_val_compare_and_swap_16"; Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_16] = "__sync_lock_test_and_set_16"; Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; + Names[RTLIB::SYNC_FETCH_AND_ADD_16] = "__sync_fetch_and_add_16"; Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; + Names[RTLIB::SYNC_FETCH_AND_SUB_16] = "__sync_fetch_and_sub_16"; Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; + Names[RTLIB::SYNC_FETCH_AND_AND_16] = "__sync_fetch_and_and_16"; Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; + Names[RTLIB::SYNC_FETCH_AND_OR_16] = "__sync_fetch_and_or_16"; Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4"; Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; + Names[RTLIB::SYNC_FETCH_AND_XOR_16] = "__sync_fetch_and_xor_16"; Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; + Names[RTLIB::SYNC_FETCH_AND_NAND_16] = "__sync_fetch_and_nand_16"; + Names[RTLIB::SYNC_FETCH_AND_MAX_1] = "__sync_fetch_and_max_1"; + Names[RTLIB::SYNC_FETCH_AND_MAX_2] = "__sync_fetch_and_max_2"; + Names[RTLIB::SYNC_FETCH_AND_MAX_4] = "__sync_fetch_and_max_4"; + Names[RTLIB::SYNC_FETCH_AND_MAX_8] = "__sync_fetch_and_max_8"; + Names[RTLIB::SYNC_FETCH_AND_MAX_16] = "__sync_fetch_and_max_16"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_1] = "__sync_fetch_and_umax_1"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_2] = "__sync_fetch_and_umax_2"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_4] = "__sync_fetch_and_umax_4"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_8] = "__sync_fetch_and_umax_8"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_16] = "__sync_fetch_and_umax_16"; + Names[RTLIB::SYNC_FETCH_AND_MIN_1] = "__sync_fetch_and_min_1"; + Names[RTLIB::SYNC_FETCH_AND_MIN_2] = "__sync_fetch_and_min_2"; + Names[RTLIB::SYNC_FETCH_AND_MIN_4] = "__sync_fetch_and_min_4"; + Names[RTLIB::SYNC_FETCH_AND_MIN_8] = "__sync_fetch_and_min_8"; + Names[RTLIB::SYNC_FETCH_AND_MIN_16] = "__sync_fetch_and_min_16"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_1] = "__sync_fetch_and_umin_1"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_2] = "__sync_fetch_and_umin_2"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_4] = "__sync_fetch_and_umin_4"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16"; if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) { Names[RTLIB::SINCOS_F32] = "sincosf"; @@ -356,6 +389,13 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::SINCOS_F128] = 0; Names[RTLIB::SINCOS_PPCF128] = 0; } + + if (Triple(TM.getTargetTriple()).getOS() != Triple::OpenBSD) { + Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail"; + } else { + // These are generally not available. + Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = 0; + } } /// InitLibcallCallingConvs - Set default libcall CallingConvs. @@ -624,7 +664,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, // Perform these initializations only once. IsLittleEndian = TD->isLittleEndian(); - PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = MaxStoresPerMemmoveOptSize = 4; @@ -682,6 +721,14 @@ void TargetLoweringBase::initActions() { // These operations default to expand. setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); + + // These library functions default to expand. + setOperationAction(ISD::FROUND, (MVT::SimpleValueType)VT, Expand); + + // These operations default to expand for vector types. + if (VT >= MVT::FIRST_VECTOR_VALUETYPE && + VT <= MVT::LAST_VECTOR_VALUETYPE) + setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); } // Most targets ignore the @llvm.prefetch intrinsic. @@ -747,6 +794,19 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); } +MVT TargetLoweringBase::getPointerTy(uint32_t AS) const { + return MVT::getIntegerVT(getPointerSizeInBits(AS)); +} + +unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const { + return TD->getPointerSizeInBits(AS); +} + +unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const { + assert(Ty->isPointerTy()); + return getPointerSizeInBits(Ty->getPointerAddressSpace()); +} + MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const { return MVT::getIntegerVT(8*TD->getPointerSize(0)); } @@ -1033,7 +1093,7 @@ void TargetLoweringBase::computeRegisterProperties() { } } -EVT TargetLoweringBase::getSetCCResultType(EVT VT) const { +EVT TargetLoweringBase::getSetCCResultType(LLVMContext &, EVT VT) const { assert(!VT.isVector() && "No default SetCC type for vectors!"); return getPointerTy(0).SimpleTy; } @@ -1162,7 +1222,7 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0)); + Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isFixed=*/true, 0, 0)); } } @@ -1228,6 +1288,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { case PtrToInt: return ISD::BITCAST; case IntToPtr: return ISD::BITCAST; case BitCast: return ISD::BITCAST; + case AddrSpaceCast: return ISD::ADDRSPACECAST; case ICmp: return ISD::SETCC; case FCmp: return ISD::SETCC; case PHI: return 0; diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 7e7359a..59d7b57 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -52,10 +52,10 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, default: report_fatal_error("We do not support this DWARF encoding yet!"); case dwarf::DW_EH_PE_absptr: - return Mang->getSymbol(GV); + return getSymbol(*Mang, GV); case dwarf::DW_EH_PE_pcrel: { return getContext().GetOrCreateSymbol(StringRef("DW.ref.") + - Mang->getSymbol(GV)->getName()); + getSymbol(*Mang, GV)->getName()); } } } @@ -104,7 +104,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -252,7 +252,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Prefix = getSectionPrefixForGlobal(Kind); SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); Name.append(Sym->getName().begin(), Sym->getName().end()); StringRef Group = ""; unsigned Flags = getELFSectionFlags(Kind); @@ -523,6 +523,11 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, const MCSection *TargetLoweringObjectFileMachO:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { + + // Handle thread local data. + if (Kind.isThreadBSS()) return TLSBSSSection; + if (Kind.isThreadData()) return TLSDataSection; + if (Kind.isText()) return GV->isWeakForLinker() ? TextCoalSection : TextSection; @@ -575,10 +580,6 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if (Kind.isBSSLocal()) return DataBSSSection; - // Handle thread local data. - if (Kind.isThreadBSS()) return TLSBSSSection; - if (Kind.isThreadData()) return TLSDataSection; - // Otherwise, just drop the variable in the normal data section. return DataSection; } @@ -613,7 +614,7 @@ shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const { // FIXME: ObjC metadata is currently emitted as internal symbols that have // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and // this horrible hack can go away. - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l') return false; } @@ -642,7 +643,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) : MachOMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -671,7 +672,7 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -726,14 +727,14 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, if (GV->isWeakForLinker()) { Selection = COFF::IMAGE_COMDAT_SELECT_ANY; Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; - MCSymbol *Sym = Mang->getSymbol(GV); Name.append("$"); - Name.append(Sym->getName().begin() + 1, Sym->getName().end()); + Mang->getNameWithPrefix(Name, GV, false, false); } return getContext().getCOFFSection(Name, Characteristics, - Selection, - Kind); + Kind, + "", + Selection); } static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { @@ -761,24 +762,29 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if (GV->isWeakForLinker()) { const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); - MCSymbol *Sym = Mang->getSymbol(GV); - Name.append(Sym->getName().begin() + 1, Sym->getName().end()); + Mang->getNameWithPrefix(Name, GV, false, false); unsigned Characteristics = getCOFFSectionFlags(Kind); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; return getContext().getCOFFSection(Name.str(), Characteristics, - COFF::IMAGE_COMDAT_SELECT_ANY, Kind); + Kind, "", COFF::IMAGE_COMDAT_SELECT_ANY); } if (Kind.isText()) - return getTextSection(); + return TextSection; if (Kind.isThreadLocal()) - return getTLSDataSection(); + return TLSDataSection; - return getDataSection(); + if (Kind.isReadOnly()) + return ReadOnlySection; + + if (Kind.isBSS()) + return BSSSection; + + return DataSection; } void TargetLoweringObjectFileCOFF:: diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp index 435a5e7..f7bf86b 100644 --- a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Function.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Target/TargetOptions.h" @@ -21,7 +22,8 @@ using namespace llvm; bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { // Check to see if we should eliminate non-leaf frame pointers and then // check to see if we should eliminate all frame pointers. - if (NoFramePointerElimNonLeaf && !NoFramePointerElim) { + if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf") && + !NoFramePointerElim) { const MachineFrameInfo *MFI = MF.getFrameInfo(); return MFI->hasCalls(); } @@ -49,30 +51,3 @@ bool TargetOptions::HonorSignDependentRoundingFPMath() const { StringRef TargetOptions::getTrapFunctionName() const { return TrapFuncName; } - -bool TargetOptions::operator==(const TargetOptions &TO) { -#define ARE_EQUAL(X) X == TO.X - return - ARE_EQUAL(UnsafeFPMath) && - ARE_EQUAL(NoInfsFPMath) && - ARE_EQUAL(NoNaNsFPMath) && - ARE_EQUAL(HonorSignDependentRoundingFPMathOption) && - ARE_EQUAL(UseSoftFloat) && - ARE_EQUAL(NoZerosInBSS) && - ARE_EQUAL(JITExceptionHandling) && - ARE_EQUAL(JITEmitDebugInfo) && - ARE_EQUAL(JITEmitDebugInfoToDisk) && - ARE_EQUAL(GuaranteedTailCallOpt) && - ARE_EQUAL(DisableTailCalls) && - ARE_EQUAL(StackAlignmentOverride) && - ARE_EQUAL(RealignStack) && - ARE_EQUAL(SSPBufferSize) && - ARE_EQUAL(EnableFastISel) && - ARE_EQUAL(PositionIndependentExecutable) && - ARE_EQUAL(EnableSegmentedStacks) && - ARE_EQUAL(UseInitArray) && - ARE_EQUAL(TrapFuncName) && - ARE_EQUAL(FloatABIType) && - ARE_EQUAL(AllowFPOpFusion); -#undef ARE_EQUAL -} diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp index 84b4bfc..5a15243 100644 --- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -23,10 +23,12 @@ using namespace llvm; TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, regclass_iterator RCB, regclass_iterator RCE, const char *const *SRINames, - const unsigned *SRILaneMasks) + const unsigned *SRILaneMasks, + unsigned SRICoveringLanes) : InfoDesc(ID), SubRegIndexNames(SRINames), SubRegIndexLaneMasks(SRILaneMasks), - RegClassBegin(RCB), RegClassEnd(RCE) { + RegClassBegin(RCB), RegClassEnd(RCE), + CoveringLanes(SRICoveringLanes) { } TargetRegisterInfo::~TargetRegisterInfo() {} @@ -71,6 +73,14 @@ void PrintRegUnit::print(raw_ostream &OS) const { OS << '~' << TRI->getName(*Roots); } +void PrintVRegOrUnit::print(raw_ostream &OS) const { + if (TRI && TRI->isVirtualRegister(Unit)) { + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); + return; + } + PrintRegUnit::print(OS); +} + /// getAllocatableClass - Return the maximal subclass of the given register /// class that is alloctable, or NULL. const TargetRegisterClass * @@ -83,7 +93,7 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { Base < BaseE; Base += 32) { unsigned Idx = Base; for (unsigned Mask = *SubClass++; Mask; Mask >>= 1) { - unsigned Offset = CountTrailingZeros_32(Mask); + unsigned Offset = countTrailingZeros(Mask); const TargetRegisterClass *SubRC = getRegClass(Idx + Offset); if (SubRC->isAllocatable()) return SubRC; @@ -153,7 +163,7 @@ const TargetRegisterClass *firstCommonClass(const uint32_t *A, const TargetRegisterInfo *TRI) { for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) if (unsigned Common = *A++ & *B++) - return TRI->getRegClass(I + CountTrailingZeros_32(Common)); + return TRI->getRegClass(I + countTrailingZeros(Common)); return 0; } diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp index 1bf14db..b0f2ca6 100644 --- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp +++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp @@ -93,33 +93,10 @@ unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI, // effectively means infinite latency. Since users of the TargetSchedule API // don't know how to handle this, we convert it to a very large latency that is // easy to distinguish when debugging the DAG but won't induce overflow. -static unsigned convertLatency(int Cycles) { +static unsigned capLatency(int Cycles) { return Cycles >= 0 ? Cycles : 1000; } -/// If we can determine the operand latency from the def only, without machine -/// model or itinerary lookup, do so. Otherwise return -1. -int TargetSchedModel::getDefLatency(const MachineInstr *DefMI, - bool FindMin) const { - - // Return a latency based on the itinerary properties and defining instruction - // if possible. Some common subtargets don't require per-operand latency, - // especially for minimum latencies. - if (FindMin) { - // If MinLatency is invalid, then use the itinerary for MinLatency. If no - // itinerary exists either, then use single cycle latency. - if (SchedModel.MinLatency < 0 && !hasInstrItineraries()) { - return 1; - } - return SchedModel.MinLatency; - } - else if (!hasInstrSchedModel() && !hasInstrItineraries()) { - return TII->defaultDefLatency(&SchedModel, DefMI); - } - // ...operand lookup required - return -1; -} - /// Return the MCSchedClassDesc for this instruction. Some SchedClasses require /// evaluation of predicates that depend on instruction operands or flags. const MCSchedClassDesc *TargetSchedModel:: @@ -177,18 +154,16 @@ static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) { // Top-level API for clients that know the operand indices. unsigned TargetSchedModel::computeOperandLatency( const MachineInstr *DefMI, unsigned DefOperIdx, - const MachineInstr *UseMI, unsigned UseOperIdx, - bool FindMin) const { + const MachineInstr *UseMI, unsigned UseOperIdx) const { - int DefLatency = getDefLatency(DefMI, FindMin); - if (DefLatency >= 0) - return DefLatency; + if (!hasInstrSchedModel() && !hasInstrItineraries()) + return TII->defaultDefLatency(&SchedModel, DefMI); if (hasInstrItineraries()) { int OperLatency = 0; if (UseMI) { - OperLatency = - TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, UseMI, UseOperIdx); + OperLatency = TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, + UseMI, UseOperIdx); } else { unsigned DefClass = DefMI->getDesc().getSchedClass(); @@ -205,13 +180,11 @@ unsigned TargetSchedModel::computeOperandLatency( // hook to allow subtargets to specialize latency. This hook is only // applicable to the InstrItins model. InstrSchedModel should model all // special cases without TII hooks. - if (!FindMin) - InstrLatency = std::max(InstrLatency, - TII->defaultDefLatency(&SchedModel, DefMI)); + InstrLatency = std::max(InstrLatency, + TII->defaultDefLatency(&SchedModel, DefMI)); return InstrLatency; } - assert(!FindMin && hasInstrSchedModel() && - "Expected a SchedModel for this cpu"); + // hasInstrSchedModel() const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); unsigned DefIdx = findDefIdx(DefMI, DefOperIdx); if (DefIdx < SCDesc->NumWriteLatencyEntries) { @@ -219,7 +192,7 @@ unsigned TargetSchedModel::computeOperandLatency( const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc, DefIdx); unsigned WriteID = WLEntry->WriteResourceID; - unsigned Latency = convertLatency(WLEntry->Cycles); + unsigned Latency = capLatency(WLEntry->Cycles); if (!UseMI) return Latency; @@ -228,13 +201,17 @@ unsigned TargetSchedModel::computeOperandLatency( if (UseDesc->NumReadAdvanceEntries == 0) return Latency; unsigned UseIdx = findUseIdx(UseMI, UseOperIdx); - return Latency - STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); + int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); + if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap + return 0; + return Latency - Advance; } // If DefIdx does not exist in the model (e.g. implicit defs), then return // unit latency (defaultDefLatency may be too conservative). #ifndef NDEBUG if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() - && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()) { + && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef() + && SchedModel.isComplete()) { std::string Err; raw_string_ostream ss(Err); ss << "DefIdx " << DefIdx << " exceeds machine model writes for " @@ -248,10 +225,13 @@ unsigned TargetSchedModel::computeOperandLatency( return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI); } -unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { +unsigned +TargetSchedModel::computeInstrLatency(const MachineInstr *MI, + bool UseDefaultDefLatency) const { // For the itinerary model, fall back to the old subtarget hook. // Allow subtargets to compute Bundle latencies outside the machine model. - if (hasInstrItineraries() || MI->isBundle()) + if (hasInstrItineraries() || MI->isBundle() || + (!hasInstrSchedModel() && !UseDefaultDefLatency)) return TII->getInstrLatency(&InstrItins, MI); if (hasInstrSchedModel()) { @@ -263,7 +243,7 @@ unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { // Lookup the definition's write latency in SubtargetInfo. const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc, DefIdx); - Latency = std::max(Latency, convertLatency(WLEntry->Cycles)); + Latency = std::max(Latency, capLatency(WLEntry->Cycles)); } return Latency; } @@ -274,13 +254,10 @@ unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { unsigned TargetSchedModel:: computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *DepMI) const { - // MinLatency == -1 is for in-order processors that always have unit - // MinLatency. MinLatency > 0 is for in-order processors with varying min - // latencies, but since this is not a RAW dep, we always use unit latency. - if (SchedModel.MinLatency != 0) + if (SchedModel.MicroOpBufferSize <= 1) return 1; - // MinLatency == 0 indicates an out-of-order processor that can dispatch + // MicroOpBufferSize > 1 indicates an out-of-order processor that can dispatch // WAW dependencies in the same cycle. // Treat predication as a data dependency for out-of-order cpus. In-order @@ -302,7 +279,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, if (SCDesc->isValid()) { for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc), *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) { - if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered) + if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->BufferSize) return 1; } } diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 7ca2bee..b9a6b47 100644 --- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1400,7 +1400,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); SlotIndex endIdx = LIS->getInstructionIndex(MI).getRegSlot(IsEarlyClobber); - LI.addRange(LiveRange(LastCopyIdx, endIdx, VNI)); + LI.addSegment(LiveInterval::Segment(LastCopyIdx, endIdx, VNI)); } } @@ -1457,7 +1457,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber); if (I->end == UseIdx) - LI.removeRange(LastCopyIdx, UseIdx); + LI.removeSegment(LastCopyIdx, UseIdx); } } else if (RemovedKillFlag) { @@ -1539,7 +1539,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { // transformations that may either eliminate the tied operands or // improve the opportunities for coalescing away the register copy. if (TiedOperands.size() == 1) { - SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs + SmallVectorImpl<std::pair<unsigned, unsigned> > &TiedPairs = TiedOperands.begin()->second; if (TiedPairs.size() == 1) { unsigned SrcIdx = TiedPairs[0].first; diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp index a95ebcd..f735ef2 100644 --- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -24,7 +24,6 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/ProfileInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -50,7 +49,6 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTree>(); - AU.addPreserved<ProfileInfo>(); } }; } @@ -87,9 +85,7 @@ bool UnreachableBlockElim::runOnFunction(Function &F) { } // Actually remove the blocks now. - ProfileInfo *PI = getAnalysisIfAvailable<ProfileInfo>(); for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) { - if (PI) PI->removeBlock(DeadBlocks[i]); DeadBlocks[i]->eraseFromParent(); } diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp index cd012d2..e0aa405 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -264,15 +265,36 @@ void VirtRegRewriter::rewrite() { SmallVector<unsigned, 8> SuperDeads; SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; + SmallPtrSet<const MachineInstr *, 4> NoReturnInsts; for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { DEBUG(MBBI->print(dbgs(), Indexes)); + bool IsExitBB = MBBI->succ_empty(); for (MachineBasicBlock::instr_iterator MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) { MachineInstr *MI = MII; ++MII; + // Check if this instruction is a call to a noreturn function. + // If so, all the definitions set by this instruction can be ignored. + if (IsExitBB && MI->isCall()) + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + MachineOperand &MO = *MOI; + if (!MO.isGlobal()) + continue; + const Function *Func = dyn_cast<Function>(MO.getGlobal()); + if (!Func || !Func->hasFnAttribute(Attribute::NoReturn) || + // We need to keep correct unwind information + // even if the function will not return, since the + // runtime may need it. + !Func->hasFnAttribute(Attribute::NoUnwind)) + continue; + NoReturnInsts.insert(MI); + break; + } + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { MachineOperand &MO = *MOI; @@ -353,7 +375,25 @@ void VirtRegRewriter::rewrite() { } // Tell MRI about physical registers in use. - for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) - if (!MRI->reg_nodbg_empty(Reg)) - MRI->setPhysRegUsed(Reg); + if (NoReturnInsts.empty()) { + for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) + if (!MRI->reg_nodbg_empty(Reg)) + MRI->setPhysRegUsed(Reg); + } else { + for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) { + if (MRI->reg_nodbg_empty(Reg)) + continue; + // Check if this register has a use that will impact the rest of the + // code. Uses in debug and noreturn instructions do not impact the + // generated code. + for (MachineRegisterInfo::reg_nodbg_iterator It = + MRI->reg_nodbg_begin(Reg), + EndIt = MRI->reg_nodbg_end(); It != EndIt; ++It) { + if (!NoReturnInsts.count(&(*It))) { + MRI->setPhysRegUsed(Reg); + break; + } + } + } + } } |