diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
129 files changed, 14314 insertions, 10275 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index c50f8b5..2ee7767 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -247,8 +247,8 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) || IsImplicitDefUse(MI, MO)) { const unsigned Reg = MO.getReg(); - PassthruRegs.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) PassthruRegs.insert(*SubRegs); } } @@ -782,7 +782,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( if (MI == CriticalPathMI) { CriticalPathSU = CriticalPathStep(CriticalPathSU); CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0; - } else { + } else if (CriticalPathSet.any()) { ExcludeRegs = &CriticalPathSet; } diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp index 4731af5..1600c67 100644 --- a/contrib/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -202,161 +202,272 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { } static bool isNoopBitcast(Type *T1, Type *T2, - const TargetLowering& TLI) { + const TargetLoweringBase& TLI) { return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) || (isa<VectorType>(T1) && isa<VectorType>(T2) && TLI.isTypeLegal(EVT::getEVT(T1)) && TLI.isTypeLegal(EVT::getEVT(T2))); } -/// sameNoopInput - Return true if V1 == V2, else if either V1 or V2 is a noop -/// (i.e., lowers to no machine code), look through it (and any transitive noop -/// operands to it) and check if it has the same noop input value. This is -/// used to determine if a tail call can be formed. -static bool sameNoopInput(const Value *V1, const Value *V2, - SmallVectorImpl<unsigned> &Els1, - SmallVectorImpl<unsigned> &Els2, - const TargetLowering &TLI) { - using std::swap; - bool swapParity = false; - bool equalEls = Els1 == Els2; +/// Look through operations that will be free to find the earliest source of +/// this value. +/// +/// @param ValLoc If V has aggegate type, we will be interested in a particular +/// scalar component. This records its address; the reverse of this list gives a +/// sequence of indices appropriate for an extractvalue to locate the important +/// value. This value is updated during the function and on exit will indicate +/// similar information for the Value returned. +/// +/// @param DataBits If this function looks through truncate instructions, this +/// will record the smallest size attained. +static const Value *getNoopInput(const Value *V, + SmallVectorImpl<unsigned> &ValLoc, + unsigned &DataBits, + const TargetLoweringBase &TLI) { while (true) { - if ((equalEls && V1 == V2) || isa<UndefValue>(V1) || isa<UndefValue>(V2)) { - if (swapParity) - // Revert to original Els1 and Els2 to avoid confusing recursive calls - swap(Els1, Els2); - return true; - } - // Try to look through V1; if V1 is not an instruction, it can't be looked // through. - const Instruction *I = dyn_cast<Instruction>(V1); + const Instruction *I = dyn_cast<Instruction>(V); + if (!I || I->getNumOperands() == 0) return V; const Value *NoopInput = 0; - if (I != 0 && I->getNumOperands() > 0) { - Value *Op = I->getOperand(0); - if (isa<TruncInst>(I)) { - // Look through truly no-op truncates. - if (TLI.isTruncateFree(Op->getType(), I->getType())) - NoopInput = Op; - } else if (isa<BitCastInst>(I)) { - // Look through truly no-op bitcasts. - if (isNoopBitcast(Op->getType(), I->getType(), TLI)) - NoopInput = Op; - } else if (isa<GetElementPtrInst>(I)) { - // Look through getelementptr - if (cast<GetElementPtrInst>(I)->hasAllZeroIndices()) - NoopInput = Op; - } else if (isa<IntToPtrInst>(I)) { - // Look through inttoptr. - // Make sure this isn't a truncating or extending cast. We could - // support this eventually, but don't bother for now. - if (!isa<VectorType>(I->getType()) && - TLI.getPointerTy().getSizeInBits() == - cast<IntegerType>(Op->getType())->getBitWidth()) - NoopInput = Op; - } else if (isa<PtrToIntInst>(I)) { - // Look through ptrtoint. - // Make sure this isn't a truncating or extending cast. We could - // support this eventually, but don't bother for now. - if (!isa<VectorType>(I->getType()) && - TLI.getPointerTy().getSizeInBits() == - cast<IntegerType>(I->getType())->getBitWidth()) - NoopInput = Op; - } else if (isa<CallInst>(I)) { - // Look through call - for (User::const_op_iterator i = I->op_begin(), - // Skip Callee - e = I->op_end() - 1; - i != e; ++i) { - unsigned attrInd = i - I->op_begin() + 1; - if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && - isNoopBitcast((*i)->getType(), I->getType(), TLI)) { - NoopInput = *i; - break; - } + + Value *Op = I->getOperand(0); + if (isa<BitCastInst>(I)) { + // Look through truly no-op bitcasts. + if (isNoopBitcast(Op->getType(), I->getType(), TLI)) + NoopInput = Op; + } else if (isa<GetElementPtrInst>(I)) { + // Look through getelementptr + if (cast<GetElementPtrInst>(I)->hasAllZeroIndices()) + NoopInput = Op; + } else if (isa<IntToPtrInst>(I)) { + // Look through inttoptr. + // Make sure this isn't a truncating or extending cast. We could + // support this eventually, but don't bother for now. + if (!isa<VectorType>(I->getType()) && + TLI.getPointerTy().getSizeInBits() == + cast<IntegerType>(Op->getType())->getBitWidth()) + NoopInput = Op; + } else if (isa<PtrToIntInst>(I)) { + // Look through ptrtoint. + // Make sure this isn't a truncating or extending cast. We could + // support this eventually, but don't bother for now. + if (!isa<VectorType>(I->getType()) && + TLI.getPointerTy().getSizeInBits() == + cast<IntegerType>(I->getType())->getBitWidth()) + NoopInput = Op; + } else if (isa<TruncInst>(I) && + TLI.allowTruncateForTailCall(Op->getType(), I->getType())) { + DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits()); + NoopInput = Op; + } else if (isa<CallInst>(I)) { + // Look through call (skipping callee) + for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 1; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; } - } else if (isa<InvokeInst>(I)) { - // Look through invoke - for (User::const_op_iterator i = I->op_begin(), - // Skip BB, BB, Callee - e = I->op_end() - 3; - i != e; ++i) { - unsigned attrInd = i - I->op_begin() + 1; - if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && - isNoopBitcast((*i)->getType(), I->getType(), TLI)) { - NoopInput = *i; - break; - } + } + } else if (isa<InvokeInst>(I)) { + // Look through invoke (skipping BB, BB, Callee) + for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 3; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; } } + } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) { + // Value may come from either the aggregate or the scalar + ArrayRef<unsigned> InsertLoc = IVI->getIndices(); + if (std::equal(InsertLoc.rbegin(), InsertLoc.rend(), + ValLoc.rbegin())) { + // The type being inserted is a nested sub-type of the aggregate; we + // have to remove those initial indices to get the location we're + // interested in for the operand. + ValLoc.resize(ValLoc.size() - InsertLoc.size()); + NoopInput = IVI->getInsertedValueOperand(); + } else { + // The struct we're inserting into has the value we're interested in, no + // change of address. + NoopInput = Op; + } + } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) { + // The part we're interested in will inevitably be some sub-section of the + // previous aggregate. Combine the two paths to obtain the true address of + // our element. + ArrayRef<unsigned> ExtractLoc = EVI->getIndices(); + std::copy(ExtractLoc.rbegin(), ExtractLoc.rend(), + std::back_inserter(ValLoc)); + NoopInput = Op; } + // Terminate if we couldn't find anything to look through. + if (!NoopInput) + return V; - if (NoopInput) { - V1 = NoopInput; - continue; - } + V = NoopInput; + } +} + +/// Return true if this scalar return value only has bits discarded on its path +/// from the "tail call" to the "ret". This includes the obvious noop +/// instructions handled by getNoopInput above as well as free truncations (or +/// extensions prior to the call). +static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal, + SmallVectorImpl<unsigned> &RetIndices, + SmallVectorImpl<unsigned> &CallIndices, + bool AllowDifferingSizes, + const TargetLoweringBase &TLI) { + + // Trace the sub-value needed by the return value as far back up the graph as + // possible, in the hope that it will intersect with the value produced by the + // call. In the simple case with no "returned" attribute, the hope is actually + // that we end up back at the tail call instruction itself. + unsigned BitsRequired = UINT_MAX; + RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI); + + // If this slot in the value returned is undef, it doesn't matter what the + // call puts there, it'll be fine. + if (isa<UndefValue>(RetVal)) + return true; - // If we already swapped, avoid infinite loop - if (swapParity) - break; + // Now do a similar search up through the graph to find where the value + // actually returned by the "tail call" comes from. In the simple case without + // a "returned" attribute, the search will be blocked immediately and the loop + // a Noop. + unsigned BitsProvided = UINT_MAX; + CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI); + + // There's no hope if we can't actually trace them to (the same part of!) the + // same value. + if (CallVal != RetVal || CallIndices != RetIndices) + return false; + + // However, intervening truncates may have made the call non-tail. Make sure + // all the bits that are needed by the "ret" have been provided by the "tail + // call". FIXME: with sufficiently cunning bit-tracking, we could look through + // extensions too. + if (BitsProvided < BitsRequired || + (!AllowDifferingSizes && BitsProvided != BitsRequired)) + return false; - // Otherwise, swap V1<->V2, Els1<->Els2 - swap(V1, V2); - swap(Els1, Els2); - swapParity = !swapParity; + return true; +} + +/// For an aggregate type, determine whether a given index is within bounds or +/// not. +static bool indexReallyValid(CompositeType *T, unsigned Idx) { + if (ArrayType *AT = dyn_cast<ArrayType>(T)) + return Idx < AT->getNumElements(); + + return Idx < cast<StructType>(T)->getNumElements(); +} + +/// Move the given iterators to the next leaf type in depth first traversal. +/// +/// Performs a depth-first traversal of the type as specified by its arguments, +/// stopping at the next leaf node (which may be a legitimate scalar type or an +/// empty struct or array). +/// +/// @param SubTypes List of the partial components making up the type from +/// outermost to innermost non-empty aggregate. The element currently +/// represented is SubTypes.back()->getTypeAtIndex(Path.back() - 1). +/// +/// @param Path Set of extractvalue indices leading from the outermost type +/// (SubTypes[0]) to the leaf node currently represented. +/// +/// @returns true if a new type was found, false otherwise. Calling this +/// function again on a finished iterator will repeatedly return +/// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty +/// aggregate or a non-aggregate +static bool advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes, + SmallVectorImpl<unsigned> &Path) { + // First march back up the tree until we can successfully increment one of the + // coordinates in Path. + while (!Path.empty() && !indexReallyValid(SubTypes.back(), Path.back() + 1)) { + Path.pop_back(); + SubTypes.pop_back(); } - for (unsigned n = 0; n < 2; ++n) { - if (isa<InsertValueInst>(V1)) { - if (isa<StructType>(V1->getType())) { - // Look through insertvalue - unsigned i, e; - for (i = 0, e = cast<StructType>(V1->getType())->getNumElements(); - i != e; ++i) { - const Value *InScalar = FindInsertedValue(const_cast<Value*>(V1), i); - if (InScalar == 0) - break; - Els1.push_back(i); - if (!sameNoopInput(InScalar, V2, Els1, Els2, TLI)) { - Els1.pop_back(); - break; - } - Els1.pop_back(); - } - if (i == e) { - if (swapParity) - swap(Els1, Els2); - return true; - } - } - } else if (!Els1.empty() && isa<ExtractValueInst>(V1)) { - const ExtractValueInst *EVI = cast<ExtractValueInst>(V1); - unsigned i = Els1.back(); - // If the scalar value being inserted is an extractvalue of the right - // index from the call, then everything is good. - if (isa<StructType>(EVI->getOperand(0)->getType()) && - EVI->getNumIndices() == 1 && EVI->getIndices()[0] == i) { - // Look through extractvalue - Els1.pop_back(); - if (sameNoopInput(EVI->getOperand(0), V2, Els1, Els2, TLI)) { - Els1.push_back(i); - if (swapParity) - swap(Els1, Els2); - return true; - } - Els1.push_back(i); - } - } + // If we reached the top, then the iterator is done. + if (Path.empty()) + return false; - swap(V1, V2); - swap(Els1, Els2); - swapParity = !swapParity; + // We know there's *some* valid leaf now, so march back down the tree picking + // out the left-most element at each node. + ++Path.back(); + Type *DeeperType = SubTypes.back()->getTypeAtIndex(Path.back()); + while (DeeperType->isAggregateType()) { + CompositeType *CT = cast<CompositeType>(DeeperType); + if (!indexReallyValid(CT, 0)) + return true; + + SubTypes.push_back(CT); + Path.push_back(0); + + DeeperType = CT->getTypeAtIndex(0U); } - if (swapParity) - swap(Els1, Els2); - return false; + return true; } +/// Find the first non-empty, scalar-like type in Next and setup the iterator +/// components. +/// +/// Assuming Next is an aggregate of some kind, this function will traverse the +/// tree from left to right (i.e. depth-first) looking for the first +/// non-aggregate type which will play a role in function return. +/// +/// For example, if Next was {[0 x i64], {{}, i32, {}}, i32} then we would setup +/// Path as [1, 1] and SubTypes as [Next, {{}, i32, {}}] to represent the first +/// i32 in that type. +static bool firstRealType(Type *Next, + SmallVectorImpl<CompositeType *> &SubTypes, + SmallVectorImpl<unsigned> &Path) { + // First initialise the iterator components to the first "leaf" node + // (i.e. node with no valid sub-type at any index, so {} does count as a leaf + // despite nominally being an aggregate). + while (Next->isAggregateType() && + indexReallyValid(cast<CompositeType>(Next), 0)) { + SubTypes.push_back(cast<CompositeType>(Next)); + Path.push_back(0); + Next = cast<CompositeType>(Next)->getTypeAtIndex(0U); + } + + // If there's no Path now, Next was originally scalar already (or empty + // leaf). We're done. + if (Path.empty()) + return true; + + // Otherwise, use normal iteration to keep looking through the tree until we + // find a non-aggregate type. + while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()) { + if (!advanceToNextLeafType(SubTypes, Path)) + return false; + } + + return true; +} + +/// Set the iterator data-structures to the next non-empty, non-aggregate +/// subtype. +static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes, + SmallVectorImpl<unsigned> &Path) { + do { + if (!advanceToNextLeafType(SubTypes, Path)) + return false; + + assert(!Path.empty() && "found a leaf but didn't set the path?"); + } while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()); + + return true; +} + + /// Test if the given instruction is in a position to be optimized /// with a tail-call. This roughly means that it's in a block with /// a return and there's nothing that needs to be scheduled @@ -399,6 +510,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, return false; } + return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret, TLI); +} + +bool llvm::returnTypeIsEligibleForTailCall(const Function *F, + const Instruction *I, + const ReturnInst *Ret, + const TargetLoweringBase &TLI) { // If the block ends with a void return or unreachable, it doesn't matter // what the call's return type is. if (!Ret || Ret->getNumOperands() == 0) return true; @@ -407,22 +525,85 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, // return type is. if (isa<UndefValue>(Ret->getOperand(0))) return true; - // Conservatively require the attributes of the call to match those of - // the return. Ignore noalias because it doesn't affect the call sequence. - const Function *F = ExitBB->getParent(); - AttributeSet CallerAttrs = F->getAttributes(); - if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). - removeAttribute(Attribute::NoAlias) != - AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). - removeAttribute(Attribute::NoAlias)) - return false; + // Make sure the attributes attached to each return are compatible. + AttrBuilder CallerAttrs(F->getAttributes(), + AttributeSet::ReturnIndex); + AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(), + AttributeSet::ReturnIndex); + + // Noalias is completely benign as far as calling convention goes, it + // shouldn't affect whether the call is a tail call. + CallerAttrs = CallerAttrs.removeAttribute(Attribute::NoAlias); + CalleeAttrs = CalleeAttrs.removeAttribute(Attribute::NoAlias); + + bool AllowDifferingSizes = true; + if (CallerAttrs.contains(Attribute::ZExt)) { + if (!CalleeAttrs.contains(Attribute::ZExt)) + return false; - // It's not safe to eliminate the sign / zero extension of the return value. - if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || - CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + AllowDifferingSizes = false; + CallerAttrs.removeAttribute(Attribute::ZExt); + CalleeAttrs.removeAttribute(Attribute::ZExt); + } else if (CallerAttrs.contains(Attribute::SExt)) { + if (!CalleeAttrs.contains(Attribute::SExt)) + return false; + + AllowDifferingSizes = false; + CallerAttrs.removeAttribute(Attribute::SExt); + CalleeAttrs.removeAttribute(Attribute::SExt); + } + + // If they're still different, there's some facet we don't understand + // (currently only "inreg", but in future who knows). It may be OK but the + // only safe option is to reject the tail call. + if (CallerAttrs != CalleeAttrs) return false; - // Otherwise, make sure the return value and I have the same value - SmallVector<unsigned, 4> Els1, Els2; - return sameNoopInput(Ret->getOperand(0), I, Els1, Els2, TLI); + const Value *RetVal = Ret->getOperand(0), *CallVal = I; + SmallVector<unsigned, 4> RetPath, CallPath; + SmallVector<CompositeType *, 4> RetSubTypes, CallSubTypes; + + bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath); + bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath); + + // Nothing's actually returned, it doesn't matter what the callee put there + // it's a valid tail call. + if (RetEmpty) + return true; + + // Iterate pairwise through each of the value types making up the tail call + // and the corresponding return. For each one we want to know whether it's + // essentially going directly from the tail call to the ret, via operations + // that end up not generating any code. + // + // We allow a certain amount of covariance here. For example it's permitted + // for the tail call to define more bits than the ret actually cares about + // (e.g. via a truncate). + do { + if (CallEmpty) { + // We've exhausted the values produced by the tail call instruction, the + // rest are essentially undef. The type doesn't really matter, but we need + // *something*. + Type *SlotType = RetSubTypes.back()->getTypeAtIndex(RetPath.back()); + CallVal = UndefValue::get(SlotType); + } + + // The manipulations performed when we're looking through an insertvalue or + // an extractvalue would happen at the front of the RetPath list, so since + // we have to copy it anyway it's more efficient to create a reversed copy. + using std::copy; + SmallVector<unsigned, 4> TmpRetPath, TmpCallPath; + copy(RetPath.rbegin(), RetPath.rend(), std::back_inserter(TmpRetPath)); + copy(CallPath.rbegin(), CallPath.rend(), std::back_inserter(TmpCallPath)); + + // Finally, we can check whether the value produced by the tail call at this + // index is compatible with the value we return. + if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath, + AllowDifferingSizes, TLI)) + return false; + + CallEmpty = !nextRealType(CallSubTypes, CallPath); + } while(nextRealType(RetSubTypes, RetPath)); + + return true; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index 188047d..5d82dd9 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -47,13 +47,18 @@ ARMException::ARMException(AsmPrinter *A) ARMException::~ARMException() {} +ARMTargetStreamer &ARMException::getTargetStreamer() { + MCTargetStreamer &TS = Asm->OutStreamer.getTargetStreamer(); + return static_cast<ARMTargetStreamer &>(TS); +} + void ARMException::EndModule() { } /// BeginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. void ARMException::BeginFunction(const MachineFunction *MF) { - Asm->OutStreamer.EmitFnStart(); + getTargetStreamer().emitFnStart(); if (Asm->MF->getFunction()->needsUnwindTableEntry()) Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber())); @@ -62,8 +67,9 @@ void ARMException::BeginFunction(const MachineFunction *MF) { /// EndFunction - Gather and emit post-function exception information. /// void ARMException::EndFunction() { + ARMTargetStreamer &ATS = getTargetStreamer(); if (!Asm->MF->getFunction()->needsUnwindTableEntry()) - Asm->OutStreamer.EmitCantUnwind(); + ATS.emitCantUnwind(); else { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber())); @@ -76,13 +82,13 @@ void ARMException::EndFunction() { // Emit references to personality. if (const Function * Personality = MMI->getPersonalities()[MMI->getPersonalityIndex()]) { - MCSymbol *PerSym = Asm->Mang->getSymbol(Personality); + MCSymbol *PerSym = Asm->getSymbol(Personality); Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global); - Asm->OutStreamer.EmitPersonality(PerSym); + ATS.emitPersonality(PerSym); } // Emit .handlerdata directive. - Asm->OutStreamer.EmitHandlerData(); + ATS.emitHandlerData(); // Emit actual exception table EmitExceptionTable(); @@ -90,7 +96,7 @@ void ARMException::EndFunction() { } } - Asm->OutStreamer.EmitFnEnd(); + ATS.emitFnEnd(); } void ARMException::EmitTypeInfos(unsigned TTypeEncoding) { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 84162ac..308b0e0 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -42,16 +42,18 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" using namespace llvm; -static const char *DWARFGroupName = "DWARF Emission"; -static const char *DbgTimerName = "DWARF Debug Writer"; -static const char *EHTimerName = "DWARF Exception Writer"; +static const char *const DWARFGroupName = "DWARF Emission"; +static const char *const DbgTimerName = "DWARF Debug Writer"; +static const char *const EHTimerName = "DWARF Exception Writer"; STATISTIC(EmittedInsts, "Number of machine instrs printed"); @@ -93,11 +95,11 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD, AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) : MachineFunctionPass(ID), - TM(tm), MAI(tm.getMCAsmInfo()), + TM(tm), MAI(tm.getMCAsmInfo()), MII(tm.getInstrInfo()), OutContext(Streamer.getContext()), OutStreamer(Streamer), LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) { - DD = 0; DE = 0; MMI = 0; LI = 0; + DD = 0; DE = 0; MMI = 0; LI = 0; MF = 0; CurrentFnSym = CurrentFnSymForSize = 0; GCMetadataPrinters = 0; VerboseAsm = Streamer.isVerboseAsm(); @@ -154,8 +156,6 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { } bool AsmPrinter::doInitialization(Module &M) { - OutStreamer.InitStreamer(); - MMI = getAnalysisIfAvailable<MachineModuleInfo>(); MMI->AnalyzeModule(M); @@ -163,7 +163,9 @@ bool AsmPrinter::doInitialization(Module &M) { const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) .Initialize(OutContext, TM); - Mang = new Mangler(OutContext, *TM.getDataLayout()); + OutStreamer.InitStreamer(); + + Mang = new Mangler(&TM); // Allow the target to emit any magic that it wants at the start of the file. EmitStartOfAsmFile(M); @@ -211,12 +213,12 @@ bool AsmPrinter::doInitialization(Module &M) { llvm_unreachable("Unknown exception type."); } -void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { - switch ((GlobalValue::LinkageTypes)Linkage) { +void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { + GlobalValue::LinkageTypes Linkage = GV->getLinkage(); + switch (Linkage) { case GlobalValue::CommonLinkage: case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: - case GlobalValue::LinkOnceODRAutoHideLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::LinkerPrivateWeakLinkage: @@ -224,8 +226,19 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - if ((GlobalValue::LinkageTypes)Linkage != - GlobalValue::LinkOnceODRAutoHideLinkage) + bool CanBeHidden = false; + + if (Linkage == GlobalValue::LinkOnceODRLinkage) { + if (GV->hasUnnamedAddr()) { + CanBeHidden = true; + } else { + GlobalStatus GS; + if (!GlobalStatus::analyzeGlobal(GV, GS) && !GS.IsCompared) + CanBeHidden = true; + } + } + + if (!CanBeHidden) // .weak_definition _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); else @@ -238,7 +251,7 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { // .weak _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak); } - break; + return; case GlobalValue::DLLExportLinkage: case GlobalValue::AppendingLinkage: // FIXME: appending linkage variables should go into a section of @@ -247,16 +260,23 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { // If external or appending, declare as a global symbol. // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - break; + return; case GlobalValue::PrivateLinkage: case GlobalValue::InternalLinkage: case GlobalValue::LinkerPrivateLinkage: - break; - default: - llvm_unreachable("Unknown linkage type!"); + return; + case GlobalValue::AvailableExternallyLinkage: + llvm_unreachable("Should never emit this"); + case GlobalValue::DLLImportLinkage: + case GlobalValue::ExternalWeakLinkage: + llvm_unreachable("Don't know how to emit these"); } + llvm_unreachable("Unknown linkage type!"); } +MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const { + return getObjFileLowering().getSymbol(*Mang, GV); +} /// EmitGlobalVariable - Emit the specified global variable to the .s file. void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { @@ -272,7 +292,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { } } - MCSymbol *GVSym = Mang->getSymbol(GV); + MCSymbol *GVSym = getSymbol(GV); EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration()); if (!GV->hasInitializer()) // External globals require no extra code. @@ -283,13 +303,16 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); - const DataLayout *TD = TM.getDataLayout(); - uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); + const DataLayout *DL = TM.getDataLayout(); + uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType()); // If the alignment is specified, we *must* obey it. Overaligning a global // with a specified alignment is a prompt way to break globals emitted to // sections and expected to be contiguous (e.g. ObjC metadata). - unsigned AlignLog = getGVAlignmentLog2(GV, *TD); + unsigned AlignLog = getGVAlignmentLog2(GV, *DL); + + if (DD) + DD->setSymbolSize(GVSym, Size); // Handle common and BSS local symbols (.lcomm). if (GVKind.isCommon() || GVKind.isBSSLocal()) { @@ -367,9 +390,10 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { MCSymbol *MangSym = OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); - if (GVKind.isThreadBSS()) + if (GVKind.isThreadBSS()) { + TheSection = getObjFileLowering().getTLSBSSSection(); OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog); - else if (GVKind.isThreadData()) { + } else if (GVKind.isThreadData()) { OutStreamer.SwitchSection(TheSection); EmitAlignment(AlignLog, GV); @@ -386,16 +410,16 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer.SwitchSection(TLVSect); // Emit the linkage here. - EmitLinkage(GV->getLinkage(), GVSym); + EmitLinkage(GV, GVSym); OutStreamer.EmitLabel(GVSym); // Three pointers in size: // - __tlv_bootstrap - used to make sure support exists // - spare pointer, used when mapped by the runtime // - pointer to mangled symbol above with initializer - unsigned PtrSize = TD->getPointerSizeInBits()/8; + unsigned PtrSize = DL->getPointerTypeSize(GV->getType()); OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), - PtrSize); + PtrSize); OutStreamer.EmitIntValue(0, PtrSize); OutStreamer.EmitSymbolValue(MangSym, PtrSize); @@ -405,7 +429,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer.SwitchSection(TheSection); - EmitLinkage(GV->getLinkage(), GVSym); + EmitLinkage(GV, GVSym); EmitAlignment(AlignLog, GV); OutStreamer.EmitLabel(GVSym); @@ -431,7 +455,7 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); EmitVisibility(CurrentFnSym, F->getVisibility()); - EmitLinkage(F->getLinkage(), CurrentFnSym); + EmitLinkage(F, CurrentFnSym); EmitAlignment(MF->getAlignment(), F); if (MAI->hasDotTypeDotSizeDirective()) @@ -457,16 +481,6 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer.EmitLabel(DeadBlockSyms[i]); } - // Add some workaround for linkonce linkage on Cygwin\MinGW. - if (MAI->getLinkOnceDirective() != 0 && - (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) { - // FIXME: What is this? - MCSymbol *FakeStub = - OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+ - CurrentFnSym->getName()); - OutStreamer.EmitLabel(FakeStub); - } - // Emit pre-function debug and/or EH information. if (DE) { NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); @@ -476,6 +490,10 @@ void AsmPrinter::EmitFunctionHeader() { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); DD->beginFunction(MF); } + + // Emit the prefix data. + if (F->hasPrefixData()) + EmitGlobalConstant(F->getPrefixData()); } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the @@ -528,11 +546,11 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { /// emitImplicitDef - This method emits the specified machine instruction /// that is an implicit def. -static void emitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) { +void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { unsigned RegNo = MI->getOperand(0).getReg(); - AP.OutStreamer.AddComment(Twine("implicit-def: ") + - AP.TM.getRegisterInfo()->getName(RegNo)); - AP.OutStreamer.AddBlankLine(); + OutStreamer.AddComment(Twine("implicit-def: ") + + TM.getRegisterInfo()->getName(RegNo)); + OutStreamer.AddBlankLine(); } static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { @@ -562,10 +580,17 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { // cast away const; DIetc do not take const operands for some reason. DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata())); - if (V.getContext().isSubprogram()) - OS << DISubprogram(V.getContext()).getDisplayName() << ":"; + if (V.getContext().isSubprogram()) { + StringRef Name = DISubprogram(V.getContext()).getDisplayName(); + if (!Name.empty()) + OS << Name << ":"; + } OS << V.getName() << " <- "; + // The second operand is only an offset if it's an immediate. + bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm(); + int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0; + // Register or immediate value. Register 0 means undef. if (MI->getOperand(0).isFPImm()) { APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); @@ -586,18 +611,31 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } else if (MI->getOperand(0).isCImm()) { MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/); } else { - assert(MI->getOperand(0).isReg() && "Unknown operand type"); - if (MI->getOperand(0).getReg() == 0) { + unsigned Reg; + if (MI->getOperand(0).isReg()) { + Reg = MI->getOperand(0).getReg(); + } else { + assert(MI->getOperand(0).isFI() && "Unknown operand type"); + const TargetFrameLowering *TFI = AP.TM.getFrameLowering(); + Offset += TFI->getFrameIndexReference(*AP.MF, + MI->getOperand(0).getIndex(), Reg); + Deref = true; + } + if (Reg == 0) { // Suppress offset, it is not meaningful here. OS << "undef"; // NOTE: Want this comment at start of line, don't emit with AddComment. AP.OutStreamer.EmitRawText(OS.str()); return true; } - OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg()); + if (Deref) + OS << '['; + OS << AP.TM.getRegisterInfo()->getName(Reg); } - OS << '+' << MI->getOperand(1).getImm(); + if (Deref) + OS << '+' << Offset << ']'; + // NOTE: Want this comment at start of line, don't emit with AddComment. AP.OutStreamer.EmitRawText(OS.str()); return true; @@ -624,7 +662,7 @@ bool AsmPrinter::needsRelocationsForDwarfStringPool() const { } void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { - MCSymbol *Label = MI.getOperand(0).getMCSymbol(); + const MCSymbol *Label = MI.getOperand(0).getMCSymbol(); if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI) return; @@ -635,14 +673,14 @@ void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { if (MMI->getCompactUnwindEncoding() != 0) OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding()); - MachineModuleInfo &MMI = MF->getMMI(); - std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + const MachineModuleInfo &MMI = MF->getMMI(); + const std::vector<MCCFIInstruction> &Instrs = MMI.getFrameInstructions(); bool FoundOne = false; (void)FoundOne; - for (std::vector<MachineMove>::iterator I = Moves.begin(), - E = Moves.end(); I != E; ++I) { + for (std::vector<MCCFIInstruction>::const_iterator I = Instrs.begin(), + E = Instrs.end(); I != E; ++I) { if (I->getLabel() == Label) { - EmitCFIFrameMove(*I); + emitCFIInstruction(*I); FoundOne = true; } } @@ -702,7 +740,7 @@ void AsmPrinter::EmitFunctionBody() { } break; case TargetOpcode::IMPLICIT_DEF: - if (isVerbose()) emitImplicitDef(II, *this); + if (isVerbose()) emitImplicitDef(II); break; case TargetOpcode::KILL: if (isVerbose()) emitKill(II, *this); @@ -790,16 +828,9 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.AddBlankLine(); } -/// getDebugValueLocation - Get location information encoded by DBG_VALUE -/// operands. -MachineLocation AsmPrinter:: -getDebugValueLocation(const MachineInstr *MI) const { - // Target specific DBG_VALUE instructions are handled by each target. - return MachineLocation(); -} - /// EmitDwarfRegOp - Emit dwarf register operation. -void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { +void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc, + bool Indirect) const { const TargetRegisterInfo *TRI = TM.getRegisterInfo(); int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); @@ -817,7 +848,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { // caller might be in the middle of an dwarf expression. We should // probably assert that Reg >= 0 once debug info generation is more mature. - if (MLoc.isIndirect()) { + if (MLoc.isIndirect() || Indirect) { if (Reg < 32) { OutStreamer.AddComment( dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg)); @@ -828,7 +859,9 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { OutStreamer.AddComment(Twine(Reg)); EmitULEB128(Reg); } - EmitSLEB128(MLoc.getOffset()); + EmitSLEB128(!MLoc.isIndirect() ? 0 : MLoc.getOffset()); + if (MLoc.isIndirect() && Indirect) + EmitInt8(dwarf::DW_OP_deref); } else { if (Reg < 32) { OutStreamer.AddComment( @@ -860,7 +893,7 @@ bool AsmPrinter::doFinalization(Module &M) { if (V == GlobalValue::DefaultVisibility) continue; - MCSymbol *Name = Mang->getSymbol(&F); + MCSymbol *Name = getSymbol(&F); EmitVisibility(Name, V, false); } @@ -870,6 +903,9 @@ bool AsmPrinter::doFinalization(Module &M) { if (!ModuleFlags.empty()) getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM); + // Make sure we wrote out everything we need. + OutStreamer.Flush(); + // Finalize debug and EH information. if (DE) { { @@ -897,12 +933,12 @@ bool AsmPrinter::doFinalization(Module &M) { for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { if (!I->hasExternalWeakLinkage()) continue; - OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); + OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference); } for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { if (!I->hasExternalWeakLinkage()) continue; - OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); + OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference); } } @@ -910,14 +946,19 @@ bool AsmPrinter::doFinalization(Module &M) { OutStreamer.AddBlankLine(); for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) { - MCSymbol *Name = Mang->getSymbol(I); + MCSymbol *Name = getSymbol(I); const GlobalValue *GV = I->getAliasedGlobal(); - MCSymbol *Target = Mang->getSymbol(GV); + if (GV->isDeclaration()) { + report_fatal_error(Name->getName() + + ": Target doesn't support aliases to declarations"); + } + + MCSymbol *Target = getSymbol(GV); if (I->hasExternalLinkage() || !MAI->getWeakRefDirective()) OutStreamer.EmitSymbolAttribute(Name, MCSA_Global); - else if (I->hasWeakLinkage()) + else if (I->hasWeakLinkage() || I->hasLinkOnceLinkage()) OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference); else assert(I->hasLocalLinkage() && "Invalid alias linkage"); @@ -936,6 +977,9 @@ bool AsmPrinter::doFinalization(Module &M) { if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I)) MP->finishAssembly(*this); + // Emit llvm.ident metadata in an '.ident' directive. + EmitModuleIdents(M); + // If we don't have any trampolines, then we don't require stack memory // to be executable. Some targets have a directive to declare this. Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); @@ -959,7 +1003,7 @@ bool AsmPrinter::doFinalization(Module &M) { void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; // Get the function symbol. - CurrentFnSym = Mang->getSymbol(MF.getFunction()); + CurrentFnSym = getSymbol(MF.getFunction()); CurrentFnSymForSize = CurrentFnSym; if (isVerbose()) @@ -1266,16 +1310,10 @@ void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) { const GlobalValue *GV = dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts()); if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang)) - OutStreamer.EmitSymbolAttribute(Mang->getSymbol(GV), MCSA_NoDeadStrip); + OutStreamer.EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip); } } -typedef std::pair<unsigned, Constant*> Structor; - -static bool priority_order(const Structor& lhs, const Structor& rhs) { - return lhs.first < rhs.first; -} - /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init /// priority. void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { @@ -1292,6 +1330,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { !isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr). // Gather the structors in a form that's convenient for sorting by priority. + typedef std::pair<unsigned, Constant *> Structor; SmallVector<Structor, 8> Structors; for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i)); @@ -1305,9 +1344,9 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { } // Emit the function pointers in the target-specific order - const DataLayout *TD = TM.getDataLayout(); - unsigned Align = Log2_32(TD->getPointerPrefAlignment()); - std::stable_sort(Structors.begin(), Structors.end(), priority_order); + const DataLayout *DL = TM.getDataLayout(); + unsigned Align = Log2_32(DL->getPointerPrefAlignment()); + std::stable_sort(Structors.begin(), Structors.end(), less_first()); for (unsigned i = 0, e = Structors.size(); i != e; ++i) { const MCSection *OutputSection = (isCtor ? @@ -1320,6 +1359,21 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { } } +void AsmPrinter::EmitModuleIdents(Module &M) { + if (!MAI->hasIdentDirective()) + return; + + if (const NamedMDNode *NMD = M.getNamedMetadata("llvm.ident")) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + const MDNode *N = NMD->getOperand(i); + assert(N->getNumOperands() == 1 && + "llvm.ident metadata entry can have only one operand"); + const MDString *S = cast<MDString>(N->getOperand(0)); + OutStreamer.EmitIdent(S->getString()); + } + } +} + //===--------------------------------------------------------------------===// // Emission and print routines // @@ -1385,12 +1439,12 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, OutContext); if (!MAI->hasSetDirective()) - OutStreamer.EmitValue(Diff, 4); + OutStreamer.EmitValue(Diff, Size); else { // Otherwise, emit with .set (aka assignment). MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); OutStreamer.EmitAssignment(SetLabel, Diff); - OutStreamer.EmitSymbolValue(SetLabel, 4); + OutStreamer.EmitSymbolValue(SetLabel, Size); } } @@ -1398,8 +1452,12 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, /// where the size in bytes of the directive is specified by Size and Label /// specifies the label. This implicitly uses .set if it is available. void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, - unsigned Size) + unsigned Size, bool IsSectionRelative) const { + if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) { + OutStreamer.EmitCOFFSecRel32(Label); + return; + } // Emit Label+Offset (or just Label if Offset is zero) const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext); @@ -1447,7 +1505,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { return MCConstantExpr::Create(CI->getZExtValue(), Ctx); if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) - return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); + return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx); if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); @@ -1477,10 +1535,10 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { - const DataLayout &TD = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getDataLayout(); // Generate a symbolic expression for the byte address - APInt OffsetAI(TD.getPointerSizeInBits(), 0); - cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); + APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); + cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI); const MCExpr *Base = lowerConstant(CE->getOperand(0), AP); if (!OffsetAI) @@ -1501,17 +1559,17 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { return lowerConstant(CE->getOperand(0), AP); case Instruction::IntToPtr: { - const DataLayout &TD = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getDataLayout(); // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); - Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), + Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()), false/*ZExt*/); return lowerConstant(Op, AP); } case Instruction::PtrToInt: { - const DataLayout &TD = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getDataLayout(); // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. Constant *Op = CE->getOperand(0); @@ -1521,13 +1579,13 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { // We can emit the pointer value into this slot if the slot is an // integer slot equal to the size of the pointer. - if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) + if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType())) return OpExpr; // Otherwise the pointer is smaller than the resultant integer, mask off // the high bits so we are sure to get a proper truncation if the input is // a constant expr. - unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); + unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType()); const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); } @@ -1561,8 +1619,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { } } -static void emitGlobalConstantImpl(const Constant *C, unsigned AddrSpace, - AsmPrinter &AP); +static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP); /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the @@ -1624,7 +1681,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { } static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, - unsigned AddrSpace,AsmPrinter &AP){ + AsmPrinter &AP){ // See if we can aggregate this into a .fill, if so, emit it as such. int Value = isRepeatedByteSequence(CDS, AP.TM); @@ -1632,12 +1689,12 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType()); // Don't emit a 1-byte object as a .fill. if (Bytes > 1) - return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); + return AP.OutStreamer.EmitFill(Bytes, Value); } // If this can be emitted with .ascii/.asciz, emit it as such. if (CDS->isString()) - return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace); + return AP.OutStreamer.EmitBytes(CDS->getAsString()); // Otherwise, emit the values in successive locations. unsigned ElementByteSize = CDS->getElementByteSize(); @@ -1647,7 +1704,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(i)); AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i), - ElementByteSize, AddrSpace); + ElementByteSize); } } else if (ElementByteSize == 4) { // FP Constants are printed as integer constants to avoid losing @@ -1662,7 +1719,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, F = CDS->getElementAsFloat(i); if (AP.isVerbose()) AP.OutStreamer.GetCommentOS() << "float " << F << '\n'; - AP.OutStreamer.EmitIntValue(I, 4, AddrSpace); + AP.OutStreamer.EmitIntValue(I, 4); } } else { assert(CDS->getElementType()->isDoubleTy()); @@ -1675,78 +1732,74 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, F = CDS->getElementAsDouble(i); if (AP.isVerbose()) AP.OutStreamer.GetCommentOS() << "double " << F << '\n'; - AP.OutStreamer.EmitIntValue(I, 8, AddrSpace); + AP.OutStreamer.EmitIntValue(I, 8); } } - const DataLayout &TD = *AP.TM.getDataLayout(); - unsigned Size = TD.getTypeAllocSize(CDS->getType()); - unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) * + const DataLayout &DL = *AP.TM.getDataLayout(); + unsigned Size = DL.getTypeAllocSize(CDS->getType()); + unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) * CDS->getNumElements(); if (unsigned Padding = Size - EmittedSize) - AP.OutStreamer.EmitZeros(Padding, AddrSpace); + AP.OutStreamer.EmitZeros(Padding); } -static void emitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, - AsmPrinter &AP) { +static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) { // See if we can aggregate some values. Make sure it can be // represented as a series of bytes of the constant value. int Value = isRepeatedByteSequence(CA, AP.TM); if (Value != -1) { uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType()); - AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); + AP.OutStreamer.EmitFill(Bytes, Value); } else { for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - emitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); + emitGlobalConstantImpl(CA->getOperand(i), AP); } } -static void emitGlobalConstantVector(const ConstantVector *CV, - unsigned AddrSpace, AsmPrinter &AP) { +static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) - emitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP); + emitGlobalConstantImpl(CV->getOperand(i), AP); - const DataLayout &TD = *AP.TM.getDataLayout(); - unsigned Size = TD.getTypeAllocSize(CV->getType()); - unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) * + const DataLayout &DL = *AP.TM.getDataLayout(); + unsigned Size = DL.getTypeAllocSize(CV->getType()); + unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) * CV->getType()->getNumElements(); if (unsigned Padding = Size - EmittedSize) - AP.OutStreamer.EmitZeros(Padding, AddrSpace); + AP.OutStreamer.EmitZeros(Padding); } -static void emitGlobalConstantStruct(const ConstantStruct *CS, - unsigned AddrSpace, AsmPrinter &AP) { +static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) { // Print the fields in successive locations. Pad to align if needed! - const DataLayout *TD = AP.TM.getDataLayout(); - unsigned Size = TD->getTypeAllocSize(CS->getType()); - const StructLayout *Layout = TD->getStructLayout(CS->getType()); + const DataLayout *DL = AP.TM.getDataLayout(); + unsigned Size = DL->getTypeAllocSize(CS->getType()); + const StructLayout *Layout = DL->getStructLayout(CS->getType()); uint64_t SizeSoFar = 0; for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { const Constant *Field = CS->getOperand(i); // Check if padding is needed and insert one or more 0s. - uint64_t FieldSize = TD->getTypeAllocSize(Field->getType()); + uint64_t FieldSize = DL->getTypeAllocSize(Field->getType()); uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1)) - Layout->getElementOffset(i)) - FieldSize; SizeSoFar += FieldSize + PadSize; // Now print the actual field value. - emitGlobalConstantImpl(Field, AddrSpace, AP); + emitGlobalConstantImpl(Field, AP); // Insert padding - this may include padding to increase the size of the // current field up to the ABI size (if the struct is not packed) as well // as padding to ensure that the next field starts at the right offset. - AP.OutStreamer.EmitZeros(PadSize, AddrSpace); + AP.OutStreamer.EmitZeros(PadSize); } assert(SizeSoFar == Layout->getSizeInBytes() && "Layout of constant struct may be incorrect!"); } -static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, - AsmPrinter &AP) { +static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { APInt API = CFP->getValueAPF().bitcastToAPInt(); // First print a comment with what we think the original floating-point value @@ -1772,47 +1825,86 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, int Chunk = API.getNumWords() - 1; if (TrailingBytes) - AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes, AddrSpace); + AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes); for (; Chunk >= 0; --Chunk) - AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace); + AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t)); } else { unsigned Chunk; for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk) - AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace); + AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t)); if (TrailingBytes) - AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes, AddrSpace); + AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes); } // Emit the tail padding for the long double. - const DataLayout &TD = *AP.TM.getDataLayout(); - AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - - TD.getTypeStoreSize(CFP->getType()), AddrSpace); + const DataLayout &DL = *AP.TM.getDataLayout(); + AP.OutStreamer.EmitZeros(DL.getTypeAllocSize(CFP->getType()) - + DL.getTypeStoreSize(CFP->getType())); } -static void emitGlobalConstantLargeInt(const ConstantInt *CI, - unsigned AddrSpace, AsmPrinter &AP) { - const DataLayout *TD = AP.TM.getDataLayout(); +static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { + const DataLayout *DL = AP.TM.getDataLayout(); unsigned BitWidth = CI->getBitWidth(); - assert((BitWidth & 63) == 0 && "only support multiples of 64-bits"); + + // Copy the value as we may massage the layout for constants whose bit width + // is not a multiple of 64-bits. + APInt Realigned(CI->getValue()); + uint64_t ExtraBits = 0; + unsigned ExtraBitsSize = BitWidth & 63; + + if (ExtraBitsSize) { + // The bit width of the data is not a multiple of 64-bits. + // The extra bits are expected to be at the end of the chunk of the memory. + // Little endian: + // * Nothing to be done, just record the extra bits to emit. + // Big endian: + // * Record the extra bits to emit. + // * Realign the raw data to emit the chunks of 64-bits. + if (DL->isBigEndian()) { + // Basically the structure of the raw data is a chunk of 64-bits cells: + // 0 1 BitWidth / 64 + // [chunk1][chunk2] ... [chunkN]. + // The most significant chunk is chunkN and it should be emitted first. + // However, due to the alignment issue chunkN contains useless bits. + // Realign the chunks so that they contain only useless information: + // ExtraBits 0 1 (BitWidth / 64) - 1 + // chu[nk1 chu][nk2 chu] ... [nkN-1 chunkN] + ExtraBits = Realigned.getRawData()[0] & + (((uint64_t)-1) >> (64 - ExtraBitsSize)); + Realigned = Realigned.lshr(ExtraBitsSize); + } else + ExtraBits = Realigned.getRawData()[BitWidth / 64]; + } // We don't expect assemblers to support integer data directives // for more than 64 bits, so we emit the data in at most 64-bit // quantities at a time. - const uint64_t *RawData = CI->getValue().getRawData(); + const uint64_t *RawData = Realigned.getRawData(); for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { - uint64_t Val = TD->isBigEndian() ? RawData[e - i - 1] : RawData[i]; - AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace); + uint64_t Val = DL->isBigEndian() ? RawData[e - i - 1] : RawData[i]; + AP.OutStreamer.EmitIntValue(Val, 8); + } + + if (ExtraBitsSize) { + // Emit the extra bits after the 64-bits chunks. + + // Emit a directive that fills the expected size. + uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize(CI->getType()); + Size -= (BitWidth / 64) * 8; + assert(Size && Size * 8 >= ExtraBitsSize && + (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize))) + == ExtraBits && "Directive too small for extra bits."); + AP.OutStreamer.EmitIntValue(ExtraBits, Size); } } -static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, - AsmPrinter &AP) { - const DataLayout *TD = AP.TM.getDataLayout(); - uint64_t Size = TD->getTypeAllocSize(CV->getType()); +static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { + const DataLayout *DL = AP.TM.getDataLayout(); + uint64_t Size = DL->getTypeAllocSize(CV->getType()); if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) - return AP.OutStreamer.EmitZeros(Size, AddrSpace); + return AP.OutStreamer.EmitZeros(Size); if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { switch (Size) { @@ -1823,64 +1915,64 @@ static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, if (AP.isVerbose()) AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n", CI->getZExtValue()); - AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); + AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size); return; default: - emitGlobalConstantLargeInt(CI, AddrSpace, AP); + emitGlobalConstantLargeInt(CI, AP); return; } } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) - return emitGlobalConstantFP(CFP, AddrSpace, AP); + return emitGlobalConstantFP(CFP, AP); if (isa<ConstantPointerNull>(CV)) { - AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); + AP.OutStreamer.EmitIntValue(0, Size); return; } if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV)) - return emitGlobalConstantDataSequential(CDS, AddrSpace, AP); + return emitGlobalConstantDataSequential(CDS, AP); if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) - return emitGlobalConstantArray(CVA, AddrSpace, AP); + return emitGlobalConstantArray(CVA, AP); if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) - return emitGlobalConstantStruct(CVS, AddrSpace, AP); + return emitGlobalConstantStruct(CVS, AP); if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of // vectors). if (CE->getOpcode() == Instruction::BitCast) - return emitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP); + return emitGlobalConstantImpl(CE->getOperand(0), AP); if (Size > 8) { // If the constant expression's size is greater than 64-bits, then we have // to emit the value in chunks. Try to constant fold the value and emit it // that way. - Constant *New = ConstantFoldConstantExpression(CE, TD); + Constant *New = ConstantFoldConstantExpression(CE, DL); if (New && New != CE) - return emitGlobalConstantImpl(New, AddrSpace, AP); + return emitGlobalConstantImpl(New, AP); } } if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) - return emitGlobalConstantVector(V, AddrSpace, AP); + return emitGlobalConstantVector(V, AP); // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size, AddrSpace); + AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. -void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { +void AsmPrinter::EmitGlobalConstant(const Constant *CV) { uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType()); if (Size) - emitGlobalConstantImpl(CV, AddrSpace, *this); + emitGlobalConstantImpl(CV, *this); else if (MAI->hasSubsectionsViaSymbols()) { // If the global has zero size, emit a single byte so that two labels don't // look like they are at the same location. - OutStreamer.EmitIntValue(0, 1, AddrSpace); + OutStreamer.EmitIntValue(0, 1); } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 31e42d4..b92f49c 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -33,7 +33,7 @@ using namespace llvm; //===----------------------------------------------------------------------===// /// EmitSLEB128 - emit the specified signed leb128 value. -void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const { +void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const { if (isVerbose() && Desc) OutStreamer.AddComment(Desc); @@ -41,7 +41,7 @@ void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const { } /// EmitULEB128 - emit the specified signed leb128 value. -void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc, +void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, unsigned PadTo) const { if (isVerbose() && Desc) OutStreamer.AddComment(Desc); @@ -169,28 +169,27 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, // Dwarf Lowering Routines //===----------------------------------------------------------------------===// -/// EmitCFIFrameMove - Emit a frame instruction. -void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const { - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - - const MachineLocation &Dst = Move.getDestination(); - const MachineLocation &Src = Move.getSource(); - - // If advancing cfa. - if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { - if (Src.getReg() == MachineLocation::VirtualFP) { - OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset()); - } else { - // Reg + Offset - OutStreamer.EmitCFIDefCfa(RI->getDwarfRegNum(Src.getReg(), true), - Src.getOffset()); - } - } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { - assert(Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true)); - } else { - assert(!Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true), - Dst.getOffset()); +void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { + switch (Inst.getOperation()) { + default: + llvm_unreachable("Unexpected instruction"); + case MCCFIInstruction::OpDefCfaOffset: + OutStreamer.EmitCFIDefCfaOffset(Inst.getOffset()); + break; + case MCCFIInstruction::OpDefCfa: + OutStreamer.EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset()); + break; + case MCCFIInstruction::OpDefCfaRegister: + OutStreamer.EmitCFIDefCfaRegister(Inst.getRegister()); + break; + case MCCFIInstruction::OpOffset: + OutStreamer.EmitCFIOffset(Inst.getRegister(), Inst.getOffset()); + break; + case MCCFIInstruction::OpRegister: + OutStreamer.EmitCFIRegister(Inst.getRegister(), Inst.getRegister2()); + break; + case MCCFIInstruction::OpWindowSave: + OutStreamer.EmitCFIWindowSave(); + break; } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index abfa330..4f927f6 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -123,7 +123,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, TM.getTargetCPU(), TM.getTargetFeatureString())); OwningPtr<MCTargetAsmParser> - TAP(TM.getTarget().createMCAsmParser(*STI, *Parser)); + TAP(TM.getTarget().createMCAsmParser(*STI, *Parser, *MII)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); @@ -213,7 +213,7 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, } else { unsigned OpFlags = MI->getOperand(OpNo).getImm(); ++OpNo; // Skip over the ID number. - + if (InlineAsm::isMemKind(OpFlags)) { Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, /*Modifier*/ 0, OS); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 673867a..e39b374 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "DIE.h" +#include "DwarfDebug.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/IR/DataLayout.h" @@ -23,6 +24,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/MD5.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -32,8 +34,10 @@ using namespace llvm; /// Profile - Used to gather unique data for the abbreviation folding set. /// void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(Attribute); - ID.AddInteger(Form); + // Explicitly cast to an integer type for which FoldingSetNodeID has + // overloads. Otherwise MSVC 2010 thinks this call is ambiguous. + ID.AddInteger(unsigned(Attribute)); + ID.AddInteger(unsigned(Form)); } //===----------------------------------------------------------------------===// @@ -43,7 +47,7 @@ void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const { /// Profile - Used to gather unique data for the abbreviation folding set. /// void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(Tag); + ID.AddInteger(unsigned(Tag)); ID.AddInteger(ChildrenFlag); // For each attribute description. @@ -55,11 +59,9 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { /// void DIEAbbrev::Emit(AsmPrinter *AP) const { // Emit its Dwarf tag type. - // FIXME: Doing work even in non-asm-verbose runs. AP->EmitULEB128(Tag, dwarf::TagString(Tag)); // Emit whether it has children DIEs. - // FIXME: Doing work even in non-asm-verbose runs. AP->EmitULEB128(ChildrenFlag, dwarf::ChildrenString(ChildrenFlag)); // For each attribute description. @@ -67,12 +69,10 @@ void DIEAbbrev::Emit(AsmPrinter *AP) const { const DIEAbbrevData &AttrData = Data[i]; // Emit attribute type. - // FIXME: Doing work even in non-asm-verbose runs. AP->EmitULEB128(AttrData.getAttribute(), dwarf::AttributeString(AttrData.getAttribute())); // Emit form type. - // FIXME: Doing work even in non-asm-verbose runs. AP->EmitULEB128(AttrData.getForm(), dwarf::FormEncodingString(AttrData.getForm())); } @@ -114,14 +114,34 @@ DIE::~DIE() { /// Climb up the parent chain to get the compile unit DIE to which this DIE /// belongs. -DIE *DIE::getCompileUnit() const { - DIE *p = getParent(); +const DIE *DIE::getCompileUnit() const { + const DIE *Cu = getCompileUnitOrNull(); + assert(Cu && "We should not have orphaned DIEs."); + return Cu; +} + +/// Climb up the parent chain to get the compile unit DIE this DIE belongs +/// to. Return NULL if DIE is not added to an owner yet. +const DIE *DIE::getCompileUnitOrNull() const { + const DIE *p = this; while (p) { if (p->getTag() == dwarf::DW_TAG_compile_unit) return p; p = p->getParent(); } - llvm_unreachable("We should not have orphaned DIEs."); + return NULL; +} + +DIEValue *DIE::findAttribute(uint16_t Attribute) { + const SmallVectorImpl<DIEValue *> &Values = getValues(); + const DIEAbbrev &Abbrevs = getAbbrev(); + + // Iterate through all the attributes until we find the one we're + // looking for, if we can't find it return NULL. + for (size_t i = 0; i < Values.size(); ++i) + if (Abbrevs.getData()[i].getAttribute() == Attribute) + return Values[i]; + return NULL; } #ifndef NDEBUG @@ -178,7 +198,7 @@ void DIE::dump() { void DIEValue::anchor() { } #ifndef NDEBUG -void DIEValue::dump() { +void DIEValue::dump() const { print(dbgs()); } #endif @@ -189,14 +209,14 @@ void DIEValue::dump() { /// EmitValue - Emit integer of appropriate size. /// -void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { +void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { unsigned Size = ~0U; switch (Form) { case dwarf::DW_FORM_flag_present: // Emit something to keep the lines and comments in sync. // FIXME: Is there a better way to do this? if (Asm->OutStreamer.hasRawTextSupport()) - Asm->OutStreamer.EmitRawText(StringRef("")); + Asm->OutStreamer.EmitRawText(""); return; case dwarf::DW_FORM_flag: // Fall thru case dwarf::DW_FORM_ref1: // Fall thru @@ -221,7 +241,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { /// SizeOf - Determine size of integer value in bytes. /// -unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_flag_present: return 0; case dwarf::DW_FORM_flag: // Fall thru @@ -244,25 +264,54 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { } #ifndef NDEBUG -void DIEInteger::print(raw_ostream &O) { +void DIEInteger::print(raw_ostream &O) const { O << "Int: " << (int64_t)Integer << " 0x"; O.write_hex(Integer); } #endif //===----------------------------------------------------------------------===// +// DIEExpr Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit expression value. +/// +void DIEExpr::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { + AP->OutStreamer.EmitValue(Expr, SizeOf(AP, Form)); +} + +/// SizeOf - Determine size of expression value in bytes. +/// +unsigned DIEExpr::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; + return AP->getDataLayout().getPointerSize(); +} + +#ifndef NDEBUG +void DIEExpr::print(raw_ostream &O) const { + O << "Expr: "; + Expr->print(O); +} +#endif + +//===----------------------------------------------------------------------===// // DIELabel Implementation //===----------------------------------------------------------------------===// /// EmitValue - Emit label value. /// -void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { - AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form)); +void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { + AP->EmitLabelReference(Label, SizeOf(AP, Form), + Form == dwarf::DW_FORM_strp || + Form == dwarf::DW_FORM_sec_offset || + Form == dwarf::DW_FORM_ref_addr); } /// SizeOf - Determine size of label value in bytes. /// -unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIELabel::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; @@ -270,7 +319,7 @@ unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { } #ifndef NDEBUG -void DIELabel::print(raw_ostream &O) { +void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); } #endif @@ -281,36 +330,70 @@ void DIELabel::print(raw_ostream &O) { /// EmitValue - Emit delta value. /// -void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const { +void DIEDelta::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form)); } /// SizeOf - Determine size of delta value in bytes. /// -unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; return AP->getDataLayout().getPointerSize(); } #ifndef NDEBUG -void DIEDelta::print(raw_ostream &O) { +void DIEDelta::print(raw_ostream &O) const { O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName(); } #endif //===----------------------------------------------------------------------===// +// DIEString Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit string value. +/// +void DIEString::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { + Access->EmitValue(AP, Form); +} + +/// SizeOf - Determine size of delta value in bytes. +/// +unsigned DIEString::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { + return Access->SizeOf(AP, Form); +} + +#ifndef NDEBUG +void DIEString::print(raw_ostream &O) const { + O << "String: " << Str << "\tSymbol: "; + Access->print(O); +} +#endif + +//===----------------------------------------------------------------------===// // DIEEntry Implementation //===----------------------------------------------------------------------===// /// EmitValue - Emit debug information entry offset. /// -void DIEEntry::EmitValue(AsmPrinter *AP, unsigned Form) const { +void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { AP->EmitInt32(Entry->getOffset()); } +unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) { + // DWARF4: References that use the attribute form DW_FORM_ref_addr are + // specified to be four bytes in the DWARF 32-bit format and eight bytes + // in the DWARF 64-bit format, while DWARF Version 2 specifies that such + // references have the same size as an address on the target system. + if (AP->getDwarfDebug()->getDwarfVersion() == 2) + return AP->getDataLayout().getPointerSize(); + return sizeof(int32_t); +} + #ifndef NDEBUG -void DIEEntry::print(raw_ostream &O) { +void DIEEntry::print(raw_ostream &O) const { O << format("Die: 0x%lx", (long)(intptr_t)Entry); } #endif @@ -333,7 +416,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) { /// EmitValue - Emit block data. /// -void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const { +void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { switch (Form) { default: llvm_unreachable("Improper form for block"); case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; @@ -349,7 +432,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const { /// SizeOf - Determine size of block data in bytes. /// -unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIEBlock::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); @@ -360,7 +443,7 @@ unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const { } #ifndef NDEBUG -void DIEBlock::print(raw_ostream &O) { +void DIEBlock::print(raw_ostream &O) const { O << "Blk: "; DIE::print(O, 5); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h index 3c06001..f4fa326 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h @@ -18,30 +18,32 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Dwarf.h" +#include "llvm/MC/MCExpr.h" #include <vector> namespace llvm { class AsmPrinter; class MCSymbol; + class MCSymbolRefExpr; class raw_ostream; //===--------------------------------------------------------------------===// - /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a + /// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a /// Dwarf abbreviation. class DIEAbbrevData { /// Attribute - Dwarf attribute code. /// - uint16_t Attribute; + dwarf::Attribute Attribute; /// Form - Dwarf form code. /// - uint16_t Form; + dwarf::Form Form; public: - DIEAbbrevData(uint16_t A, uint16_t F) : Attribute(A), Form(F) {} + DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) : Attribute(A), Form(F) {} // Accessors. - uint16_t getAttribute() const { return Attribute; } - uint16_t getForm() const { return Form; } + dwarf::Attribute getAttribute() const { return Attribute; } + dwarf::Form getForm() const { return Form; } /// Profile - Used to gather unique data for the abbreviation folding set. /// @@ -54,7 +56,7 @@ namespace llvm { class DIEAbbrev : public FoldingSetNode { /// Tag - Dwarf tag code. /// - uint16_t Tag; + dwarf::Tag Tag; /// ChildrenFlag - Dwarf children flag. /// @@ -69,29 +71,22 @@ namespace llvm { SmallVector<DIEAbbrevData, 12> Data; public: - DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {} + DIEAbbrev(dwarf::Tag T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {} // Accessors. - uint16_t getTag() const { return Tag; } + dwarf::Tag getTag() const { return Tag; } unsigned getNumber() const { return Number; } uint16_t getChildrenFlag() const { return ChildrenFlag; } const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; } - void setTag(uint16_t T) { Tag = T; } void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; } void setNumber(unsigned N) { Number = N; } /// AddAttribute - Adds another set of attribute information to the /// abbreviation. - void AddAttribute(uint16_t Attribute, uint16_t Form) { + void AddAttribute(dwarf::Attribute Attribute, dwarf::Form Form) { Data.push_back(DIEAbbrevData(Attribute, Form)); } - /// AddFirstAttribute - Adds a set of attribute information to the front - /// of the abbreviation. - void AddFirstAttribute(uint16_t Attribute, uint16_t Form) { - Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form)); - } - /// Profile - Used to gather unique data for the abbreviation folding set. /// void Profile(FoldingSetNodeID &ID) const; @@ -135,17 +130,17 @@ namespace llvm { /// SmallVector<DIEValue*, 12> Values; - // Private data for print() - mutable unsigned IndentCount; public: explicit DIE(unsigned Tag) - : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0) {} + : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no), + Parent(0) {} virtual ~DIE(); // Accessors. DIEAbbrev &getAbbrev() { return Abbrev; } + const DIEAbbrev &getAbbrev() const { return Abbrev; } unsigned getAbbrevNumber() const { return Abbrev.getNumber(); } - unsigned getTag() const { return Abbrev.getTag(); } + dwarf::Tag getTag() const { return Abbrev.getTag(); } unsigned getOffset() const { return Offset; } unsigned getSize() const { return Size; } const std::vector<DIE *> &getChildren() const { return Children; } @@ -153,14 +148,17 @@ namespace llvm { DIE *getParent() const { return Parent; } /// Climb up the parent chain to get the compile unit DIE this DIE belongs /// to. - DIE *getCompileUnit() const; - void setTag(unsigned Tag) { Abbrev.setTag(Tag); } + const DIE *getCompileUnit() const; + /// Similar to getCompileUnit, returns null when DIE is not added to an + /// owner yet. + const DIE *getCompileUnitOrNull() const; void setOffset(unsigned O) { Offset = O; } void setSize(unsigned S) { Size = S; } /// addValue - Add a value and attributes to a DIE. /// - void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) { + void addValue(dwarf::Attribute Attribute, dwarf::Form Form, + DIEValue *Value) { Abbrev.AddAttribute(Attribute, Form); Values.push_back(Value); } @@ -168,15 +166,16 @@ namespace llvm { /// addChild - Add a child to the DIE. /// void addChild(DIE *Child) { - if (Child->getParent()) { - assert (Child->getParent() == this && "Unexpected DIE Parent!"); - return; - } + assert(!Child->getParent()); Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes); Children.push_back(Child); Child->Parent = this; } + /// findAttribute - Find a value in the DIE with the attribute given, returns NULL + /// if no such attribute exists. + DIEValue *findAttribute(uint16_t Attribute); + #ifndef NDEBUG void print(raw_ostream &O, unsigned IndentCount = 0) const; void dump(); @@ -192,6 +191,7 @@ namespace llvm { enum { isInteger, isString, + isExpr, isLabel, isDelta, isEntry, @@ -210,15 +210,15 @@ namespace llvm { /// EmitValue - Emit value via the Dwarf writer. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const = 0; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const = 0; /// SizeOf - Return the size of a value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const = 0; #ifndef NDEBUG - virtual void print(raw_ostream &O) = 0; - void dump(); + virtual void print(raw_ostream &O) const = 0; + void dump() const; #endif }; @@ -232,7 +232,7 @@ namespace llvm { /// BestForm - Choose the best form for integer. /// - static unsigned BestForm(bool IsSigned, uint64_t Int) { + static dwarf::Form BestForm(bool IsSigned, uint64_t Int) { if (IsSigned) { const int64_t SignedInt = Int; if ((char)Int == SignedInt) return dwarf::DW_FORM_data1; @@ -248,24 +248,52 @@ namespace llvm { /// EmitValue - Emit integer of appropriate size. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; uint64_t getValue() const { return Integer; } /// SizeOf - Determine size of integer value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *I) { return I->getType() == isInteger; } #ifndef NDEBUG - virtual void print(raw_ostream &O); + virtual void print(raw_ostream &O) const; #endif }; //===--------------------------------------------------------------------===// - /// DIELabel - A label expression DIE. + /// DIEExpr - An expression DIE. + // + class DIEExpr : public DIEValue { + const MCExpr *Expr; + public: + explicit DIEExpr(const MCExpr *E) : DIEValue(isExpr), Expr(E) {} + + /// EmitValue - Emit expression value. + /// + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; + + /// getValue - Get MCExpr. + /// + const MCExpr *getValue() const { return Expr; } + + /// SizeOf - Determine size of expression value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { return E->getType() == isExpr; } + +#ifndef NDEBUG + virtual void print(raw_ostream &O) const; +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIELabel - A label DIE. // class DIELabel : public DIEValue { const MCSymbol *Label; @@ -274,21 +302,21 @@ namespace llvm { /// EmitValue - Emit label value. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// getValue - Get MCSymbol. /// - const MCSymbol *getValue() const { return Label; } + const MCSymbol *getValue() const { return Label; } /// SizeOf - Determine size of label value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *L) { return L->getType() == isLabel; } #ifndef NDEBUG - virtual void print(raw_ostream &O); + virtual void print(raw_ostream &O) const; #endif }; @@ -304,46 +332,82 @@ namespace llvm { /// EmitValue - Emit delta value. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// SizeOf - Determine size of delta value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *D) { return D->getType() == isDelta; } #ifndef NDEBUG - virtual void print(raw_ostream &O); + virtual void print(raw_ostream &O) const; #endif }; //===--------------------------------------------------------------------===// + /// DIEString - A container for string values. + /// + class DIEString : public DIEValue { + const DIEValue *Access; + const StringRef Str; + + public: + DIEString(const DIEValue *Acc, const StringRef S) + : DIEValue(isString), Access(Acc), Str(S) {} + + /// getString - Grab the string out of the object. + StringRef getString() const { return Str; } + + /// EmitValue - Emit delta value. + /// + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; + + /// SizeOf - Determine size of delta value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *D) { return D->getType() == isString; } + + #ifndef NDEBUG + virtual void print(raw_ostream &O) const; + #endif + }; + + //===--------------------------------------------------------------------===// /// DIEEntry - A pointer to another debug information entry. An instance of /// this class can also be used as a proxy for a debug information entry not /// yet defined (ie. types.) class DIEEntry : public DIEValue { DIE *const Entry; public: - explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {} + explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) { + assert(E && "Cannot construct a DIEEntry with a null DIE"); + } DIE *getEntry() const { return Entry; } /// EmitValue - Emit debug information entry offset. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// SizeOf - Determine size of debug information entry in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const { - return sizeof(int32_t); + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const { + return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP) + : sizeof(int32_t); } + /// Returns size of a ref_addr entry. + static unsigned getRefAddrSize(AsmPrinter *AP); + // Implement isa/cast/dyncast. static bool classof(const DIEValue *E) { return E->getType() == isEntry; } #ifndef NDEBUG - virtual void print(raw_ostream &O); + virtual void print(raw_ostream &O) const; #endif }; @@ -353,9 +417,7 @@ namespace llvm { class DIEBlock : public DIEValue, public DIE { unsigned Size; // Size in bytes excluding size header. public: - DIEBlock() - : DIEValue(isBlock), DIE(0), Size(0) {} - virtual ~DIEBlock() {} + DIEBlock() : DIEValue(isBlock), DIE(0), Size(0) {} /// ComputeSize - calculate the size of the block. /// @@ -363,7 +425,7 @@ namespace llvm { /// BestForm - Choose the best form for data. /// - unsigned BestForm() const { + dwarf::Form BestForm() const { if ((unsigned char)Size == Size) return dwarf::DW_FORM_block1; if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2; if ((unsigned int)Size == Size) return dwarf::DW_FORM_block4; @@ -372,17 +434,17 @@ namespace llvm { /// EmitValue - Emit block data. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// SizeOf - Determine size of block data in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *E) { return E->getType() == isBlock; } #ifndef NDEBUG - virtual void print(raw_ostream &O); + virtual void print(raw_ostream &O) const; #endif }; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp new file mode 100644 index 0000000..95eca90 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -0,0 +1,507 @@ +//===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for DWARF4 hashing of DIEs. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dwarfdebug" + +#include "DIEHash.h" + +#include "DIE.h" +#include "DwarfCompileUnit.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +/// \brief Grabs the string in whichever attribute is passed in and returns +/// a reference to it. +static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) { + const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); + const DIEAbbrev &Abbrevs = Die.getAbbrev(); + + // Iterate through all the attributes until we find the one we're + // looking for, if we can't find it return an empty string. + for (size_t i = 0; i < Values.size(); ++i) { + if (Abbrevs.getData()[i].getAttribute() == Attr) { + DIEValue *V = Values[i]; + assert(isa<DIEString>(V) && "String requested. Not a string."); + DIEString *S = cast<DIEString>(V); + return S->getString(); + } + } + return StringRef(""); +} + +/// \brief Adds the string in \p Str to the hash. This also hashes +/// a trailing NULL with the string. +void DIEHash::addString(StringRef Str) { + DEBUG(dbgs() << "Adding string " << Str << " to hash.\n"); + Hash.update(Str); + Hash.update(makeArrayRef((uint8_t)'\0')); +} + +// FIXME: The LEB128 routines are copied and only slightly modified out of +// LEB128.h. + +/// \brief Adds the unsigned in \p Value to the hash encoded as a ULEB128. +void DIEHash::addULEB128(uint64_t Value) { + DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); + do { + uint8_t Byte = Value & 0x7f; + Value >>= 7; + if (Value != 0) + Byte |= 0x80; // Mark this byte to show that more bytes will follow. + Hash.update(Byte); + } while (Value != 0); +} + +void DIEHash::addSLEB128(int64_t Value) { + DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); + bool More; + do { + uint8_t Byte = Value & 0x7f; + Value >>= 7; + More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || + ((Value == -1) && ((Byte & 0x40) != 0)))); + if (More) + Byte |= 0x80; // Mark this byte to show that more bytes will follow. + Hash.update(Byte); + } while (More); +} + +/// \brief Including \p Parent adds the context of Parent to the hash.. +void DIEHash::addParentContext(const DIE &Parent) { + + DEBUG(dbgs() << "Adding parent context to hash...\n"); + + // [7.27.2] For each surrounding type or namespace beginning with the + // outermost such construct... + SmallVector<const DIE *, 1> Parents; + const DIE *Cur = &Parent; + while (Cur->getTag() != dwarf::DW_TAG_compile_unit) { + Parents.push_back(Cur); + Cur = Cur->getParent(); + } + + // Reverse iterate over our list to go from the outermost construct to the + // innermost. + for (SmallVectorImpl<const DIE *>::reverse_iterator I = Parents.rbegin(), + E = Parents.rend(); + I != E; ++I) { + const DIE &Die = **I; + + // ... Append the letter "C" to the sequence... + addULEB128('C'); + + // ... Followed by the DWARF tag of the construct... + addULEB128(Die.getTag()); + + // ... Then the name, taken from the DW_AT_name attribute. + StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name); + DEBUG(dbgs() << "... adding context: " << Name << "\n"); + if (!Name.empty()) + addString(Name); + } +} + +// Collect all of the attributes for a particular DIE in single structure. +void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) { + const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); + const DIEAbbrev &Abbrevs = Die.getAbbrev(); + +#define COLLECT_ATTR(NAME) \ + case dwarf::NAME: \ + Attrs.NAME.Val = Values[i]; \ + Attrs.NAME.Desc = &Abbrevs.getData()[i]; \ + break + + for (size_t i = 0, e = Values.size(); i != e; ++i) { + DEBUG(dbgs() << "Attribute: " + << dwarf::AttributeString(Abbrevs.getData()[i].getAttribute()) + << " added.\n"); + switch (Abbrevs.getData()[i].getAttribute()) { + COLLECT_ATTR(DW_AT_name); + COLLECT_ATTR(DW_AT_accessibility); + COLLECT_ATTR(DW_AT_address_class); + COLLECT_ATTR(DW_AT_allocated); + COLLECT_ATTR(DW_AT_artificial); + COLLECT_ATTR(DW_AT_associated); + COLLECT_ATTR(DW_AT_binary_scale); + COLLECT_ATTR(DW_AT_bit_offset); + COLLECT_ATTR(DW_AT_bit_size); + COLLECT_ATTR(DW_AT_bit_stride); + COLLECT_ATTR(DW_AT_byte_size); + COLLECT_ATTR(DW_AT_byte_stride); + COLLECT_ATTR(DW_AT_const_expr); + COLLECT_ATTR(DW_AT_const_value); + COLLECT_ATTR(DW_AT_containing_type); + COLLECT_ATTR(DW_AT_count); + COLLECT_ATTR(DW_AT_data_bit_offset); + COLLECT_ATTR(DW_AT_data_location); + COLLECT_ATTR(DW_AT_data_member_location); + COLLECT_ATTR(DW_AT_decimal_scale); + COLLECT_ATTR(DW_AT_decimal_sign); + COLLECT_ATTR(DW_AT_default_value); + COLLECT_ATTR(DW_AT_digit_count); + COLLECT_ATTR(DW_AT_discr); + COLLECT_ATTR(DW_AT_discr_list); + COLLECT_ATTR(DW_AT_discr_value); + COLLECT_ATTR(DW_AT_encoding); + COLLECT_ATTR(DW_AT_enum_class); + COLLECT_ATTR(DW_AT_endianity); + COLLECT_ATTR(DW_AT_explicit); + COLLECT_ATTR(DW_AT_is_optional); + COLLECT_ATTR(DW_AT_location); + COLLECT_ATTR(DW_AT_lower_bound); + COLLECT_ATTR(DW_AT_mutable); + COLLECT_ATTR(DW_AT_ordering); + COLLECT_ATTR(DW_AT_picture_string); + COLLECT_ATTR(DW_AT_prototyped); + COLLECT_ATTR(DW_AT_small); + COLLECT_ATTR(DW_AT_segment); + COLLECT_ATTR(DW_AT_string_length); + COLLECT_ATTR(DW_AT_threads_scaled); + COLLECT_ATTR(DW_AT_upper_bound); + COLLECT_ATTR(DW_AT_use_location); + COLLECT_ATTR(DW_AT_use_UTF8); + COLLECT_ATTR(DW_AT_variable_parameter); + COLLECT_ATTR(DW_AT_virtuality); + COLLECT_ATTR(DW_AT_visibility); + COLLECT_ATTR(DW_AT_vtable_elem_location); + COLLECT_ATTR(DW_AT_type); + default: + break; + } + } +} + +void DIEHash::hashShallowTypeReference(dwarf::Attribute Attribute, + const DIE &Entry, StringRef Name) { + // append the letter 'N' + addULEB128('N'); + + // the DWARF attribute code (DW_AT_type or DW_AT_friend), + addULEB128(Attribute); + + // the context of the tag, + if (const DIE *Parent = Entry.getParent()) + addParentContext(*Parent); + + // the letter 'E', + addULEB128('E'); + + // and the name of the type. + addString(Name); + + // Currently DW_TAG_friends are not used by Clang, but if they do become so, + // here's the relevant spec text to implement: + // + // For DW_TAG_friend, if the referenced entry is the DW_TAG_subprogram, + // the context is omitted and the name to be used is the ABI-specific name + // of the subprogram (e.g., the mangled linker name). +} + +void DIEHash::hashRepeatedTypeReference(dwarf::Attribute Attribute, + unsigned DieNumber) { + // a) If T is in the list of [previously hashed types], use the letter + // 'R' as the marker + addULEB128('R'); + + addULEB128(Attribute); + + // and use the unsigned LEB128 encoding of [the index of T in the + // list] as the attribute value; + addULEB128(DieNumber); +} + +void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, + const DIE &Entry) { + assert(Tag != dwarf::DW_TAG_friend && "No current LLVM clients emit friend " + "tags. Add support here when there's " + "a use case"); + // Step 5 + // If the tag in Step 3 is one of [the below tags] + if ((Tag == dwarf::DW_TAG_pointer_type || + Tag == dwarf::DW_TAG_reference_type || + Tag == dwarf::DW_TAG_rvalue_reference_type || + Tag == dwarf::DW_TAG_ptr_to_member_type) && + // and the referenced type (via the [below attributes]) + // FIXME: This seems overly restrictive, and causes hash mismatches + // there's a decl/def difference in the containing type of a + // ptr_to_member_type, but it's what DWARF says, for some reason. + Attribute == dwarf::DW_AT_type) { + // ... has a DW_AT_name attribute, + StringRef Name = getDIEStringAttr(Entry, dwarf::DW_AT_name); + if (!Name.empty()) { + hashShallowTypeReference(Attribute, Entry, Name); + return; + } + } + + unsigned &DieNumber = Numbering[&Entry]; + if (DieNumber) { + hashRepeatedTypeReference(Attribute, DieNumber); + return; + } + + // otherwise, b) use the letter 'T' as a the marker, ... + addULEB128('T'); + + addULEB128(Attribute); + + // ... process the type T recursively by performing Steps 2 through 7, and + // use the result as the attribute value. + DieNumber = Numbering.size(); + computeHash(Entry); +} + +// Hash an individual attribute \param Attr based on the type of attribute and +// the form. +void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) { + const DIEValue *Value = Attr.Val; + const DIEAbbrevData *Desc = Attr.Desc; + dwarf::Attribute Attribute = Desc->getAttribute(); + + // 7.27 Step 3 + // ... An attribute that refers to another type entry T is processed as + // follows: + if (const DIEEntry *EntryAttr = dyn_cast<DIEEntry>(Value)) { + hashDIEEntry(Attribute, Tag, *EntryAttr->getEntry()); + return; + } + + // Other attribute values use the letter 'A' as the marker, ... + addULEB128('A'); + + addULEB128(Attribute); + + // ... and the value consists of the form code (encoded as an unsigned LEB128 + // value) followed by the encoding of the value according to the form code. To + // ensure reproducibility of the signature, the set of forms used in the + // signature computation is limited to the following: DW_FORM_sdata, + // DW_FORM_flag, DW_FORM_string, and DW_FORM_block. + switch (Desc->getForm()) { + case dwarf::DW_FORM_string: + llvm_unreachable( + "Add support for DW_FORM_string if we ever start emitting them again"); + case dwarf::DW_FORM_GNU_str_index: + case dwarf::DW_FORM_strp: + addULEB128(dwarf::DW_FORM_string); + addString(cast<DIEString>(Value)->getString()); + break; + case dwarf::DW_FORM_data1: + case dwarf::DW_FORM_data2: + case dwarf::DW_FORM_data4: + case dwarf::DW_FORM_data8: + case dwarf::DW_FORM_udata: + addULEB128(dwarf::DW_FORM_sdata); + addSLEB128((int64_t)cast<DIEInteger>(Value)->getValue()); + break; + default: + llvm_unreachable("Add support for additional forms"); + } +} + +// Go through the attributes from \param Attrs in the order specified in 7.27.4 +// and hash them. +void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) { +#define ADD_ATTR(ATTR) \ + { \ + if (ATTR.Val != 0) \ + hashAttribute(ATTR, Tag); \ + } + + ADD_ATTR(Attrs.DW_AT_name); + ADD_ATTR(Attrs.DW_AT_accessibility); + ADD_ATTR(Attrs.DW_AT_address_class); + ADD_ATTR(Attrs.DW_AT_allocated); + ADD_ATTR(Attrs.DW_AT_artificial); + ADD_ATTR(Attrs.DW_AT_associated); + ADD_ATTR(Attrs.DW_AT_binary_scale); + ADD_ATTR(Attrs.DW_AT_bit_offset); + ADD_ATTR(Attrs.DW_AT_bit_size); + ADD_ATTR(Attrs.DW_AT_bit_stride); + ADD_ATTR(Attrs.DW_AT_byte_size); + ADD_ATTR(Attrs.DW_AT_byte_stride); + ADD_ATTR(Attrs.DW_AT_const_expr); + ADD_ATTR(Attrs.DW_AT_const_value); + ADD_ATTR(Attrs.DW_AT_containing_type); + ADD_ATTR(Attrs.DW_AT_count); + ADD_ATTR(Attrs.DW_AT_data_bit_offset); + ADD_ATTR(Attrs.DW_AT_data_location); + ADD_ATTR(Attrs.DW_AT_data_member_location); + ADD_ATTR(Attrs.DW_AT_decimal_scale); + ADD_ATTR(Attrs.DW_AT_decimal_sign); + ADD_ATTR(Attrs.DW_AT_default_value); + ADD_ATTR(Attrs.DW_AT_digit_count); + ADD_ATTR(Attrs.DW_AT_discr); + ADD_ATTR(Attrs.DW_AT_discr_list); + ADD_ATTR(Attrs.DW_AT_discr_value); + ADD_ATTR(Attrs.DW_AT_encoding); + ADD_ATTR(Attrs.DW_AT_enum_class); + ADD_ATTR(Attrs.DW_AT_endianity); + ADD_ATTR(Attrs.DW_AT_explicit); + ADD_ATTR(Attrs.DW_AT_is_optional); + ADD_ATTR(Attrs.DW_AT_location); + ADD_ATTR(Attrs.DW_AT_lower_bound); + ADD_ATTR(Attrs.DW_AT_mutable); + ADD_ATTR(Attrs.DW_AT_ordering); + ADD_ATTR(Attrs.DW_AT_picture_string); + ADD_ATTR(Attrs.DW_AT_prototyped); + ADD_ATTR(Attrs.DW_AT_small); + ADD_ATTR(Attrs.DW_AT_segment); + ADD_ATTR(Attrs.DW_AT_string_length); + ADD_ATTR(Attrs.DW_AT_threads_scaled); + ADD_ATTR(Attrs.DW_AT_upper_bound); + ADD_ATTR(Attrs.DW_AT_use_location); + ADD_ATTR(Attrs.DW_AT_use_UTF8); + ADD_ATTR(Attrs.DW_AT_variable_parameter); + ADD_ATTR(Attrs.DW_AT_virtuality); + ADD_ATTR(Attrs.DW_AT_visibility); + ADD_ATTR(Attrs.DW_AT_vtable_elem_location); + ADD_ATTR(Attrs.DW_AT_type); + + // FIXME: Add the extended attributes. +} + +// Add all of the attributes for \param Die to the hash. +void DIEHash::addAttributes(const DIE &Die) { + DIEAttrs Attrs = {}; + collectAttributes(Die, Attrs); + hashAttributes(Attrs, Die.getTag()); +} + +void DIEHash::hashNestedType(const DIE &Die, StringRef Name) { + // 7.27 Step 7 + // ... append the letter 'S', + addULEB128('S'); + + // the tag of C, + addULEB128(Die.getTag()); + + // and the name. + addString(Name); +} + +// Compute the hash of a DIE. This is based on the type signature computation +// given in section 7.27 of the DWARF4 standard. It is the md5 hash of a +// flattened description of the DIE. +void DIEHash::computeHash(const DIE &Die) { + // Append the letter 'D', followed by the DWARF tag of the DIE. + addULEB128('D'); + addULEB128(Die.getTag()); + + // Add each of the attributes of the DIE. + addAttributes(Die); + + // Then hash each of the children of the DIE. + for (std::vector<DIE *>::const_iterator I = Die.getChildren().begin(), + E = Die.getChildren().end(); + I != E; ++I) { + // 7.27 Step 7 + // If C is a nested type entry or a member function entry, ... + if (isType((*I)->getTag()) || (*I)->getTag() == dwarf::DW_TAG_subprogram) { + StringRef Name = getDIEStringAttr(**I, dwarf::DW_AT_name); + // ... and has a DW_AT_name attribute + if (!Name.empty()) { + hashNestedType(**I, Name); + continue; + } + } + computeHash(**I); + } + + // Following the last (or if there are no children), append a zero byte. + Hash.update(makeArrayRef((uint8_t)'\0')); +} + +/// This is based on the type signature computation given in section 7.27 of the +/// DWARF4 standard. It is the md5 hash of a flattened description of the DIE +/// with the exception that we are hashing only the context and the name of the +/// type. +uint64_t DIEHash::computeDIEODRSignature(const DIE &Die) { + + // Add the contexts to the hash. We won't be computing the ODR hash for + // function local types so it's safe to use the generic context hashing + // algorithm here. + // FIXME: If we figure out how to account for linkage in some way we could + // actually do this with a slight modification to the parent hash algorithm. + if (const DIE *Parent = Die.getParent()) + addParentContext(*Parent); + + // Add the current DIE information. + + // Add the DWARF tag of the DIE. + addULEB128(Die.getTag()); + + // Add the name of the type to the hash. + addString(getDIEStringAttr(Die, dwarf::DW_AT_name)); + + // Now get the result. + MD5::MD5Result Result; + Hash.final(Result); + + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + return *reinterpret_cast<support::ulittle64_t *>(Result + 8); +} + +/// This is based on the type signature computation given in section 7.27 of the +/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE +/// with the inclusion of the full CU and all top level CU entities. +// TODO: Initialize the type chain at 0 instead of 1 for CU signatures. +uint64_t DIEHash::computeCUSignature(const DIE &Die) { + Numbering.clear(); + Numbering[&Die] = 1; + + // Hash the DIE. + computeHash(Die); + + // Now return the result. + MD5::MD5Result Result; + Hash.final(Result); + + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + return *reinterpret_cast<support::ulittle64_t *>(Result + 8); +} + +/// This is based on the type signature computation given in section 7.27 of the +/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE +/// with the inclusion of additional forms not specifically called out in the +/// standard. +uint64_t DIEHash::computeTypeSignature(const DIE &Die) { + Numbering.clear(); + Numbering[&Die] = 1; + + if (const DIE *Parent = Die.getParent()) + addParentContext(*Parent); + + // Hash the DIE. + computeHash(Die); + + // Now return the result. + MD5::MD5Result Result; + Hash.final(Result); + + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + return *reinterpret_cast<support::ulittle64_t *>(Result + 8); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h new file mode 100644 index 0000000..f0c4ef9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h @@ -0,0 +1,147 @@ +//===-- llvm/CodeGen/DIEHash.h - Dwarf Hashing Framework -------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for DWARF4 hashing of DIEs. +// +//===----------------------------------------------------------------------===// + +#include "DIE.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/MD5.h" + +namespace llvm { + +class CompileUnit; + +/// \brief An object containing the capability of hashing and adding hash +/// attributes onto a DIE. +class DIEHash { + // The entry for a particular attribute. + struct AttrEntry { + const DIEValue *Val; + const DIEAbbrevData *Desc; + }; + + // Collection of all attributes used in hashing a particular DIE. + struct DIEAttrs { + AttrEntry DW_AT_name; + AttrEntry DW_AT_accessibility; + AttrEntry DW_AT_address_class; + AttrEntry DW_AT_allocated; + AttrEntry DW_AT_artificial; + AttrEntry DW_AT_associated; + AttrEntry DW_AT_binary_scale; + AttrEntry DW_AT_bit_offset; + AttrEntry DW_AT_bit_size; + AttrEntry DW_AT_bit_stride; + AttrEntry DW_AT_byte_size; + AttrEntry DW_AT_byte_stride; + AttrEntry DW_AT_const_expr; + AttrEntry DW_AT_const_value; + AttrEntry DW_AT_containing_type; + AttrEntry DW_AT_count; + AttrEntry DW_AT_data_bit_offset; + AttrEntry DW_AT_data_location; + AttrEntry DW_AT_data_member_location; + AttrEntry DW_AT_decimal_scale; + AttrEntry DW_AT_decimal_sign; + AttrEntry DW_AT_default_value; + AttrEntry DW_AT_digit_count; + AttrEntry DW_AT_discr; + AttrEntry DW_AT_discr_list; + AttrEntry DW_AT_discr_value; + AttrEntry DW_AT_encoding; + AttrEntry DW_AT_enum_class; + AttrEntry DW_AT_endianity; + AttrEntry DW_AT_explicit; + AttrEntry DW_AT_is_optional; + AttrEntry DW_AT_location; + AttrEntry DW_AT_lower_bound; + AttrEntry DW_AT_mutable; + AttrEntry DW_AT_ordering; + AttrEntry DW_AT_picture_string; + AttrEntry DW_AT_prototyped; + AttrEntry DW_AT_small; + AttrEntry DW_AT_segment; + AttrEntry DW_AT_string_length; + AttrEntry DW_AT_threads_scaled; + AttrEntry DW_AT_upper_bound; + AttrEntry DW_AT_use_location; + AttrEntry DW_AT_use_UTF8; + AttrEntry DW_AT_variable_parameter; + AttrEntry DW_AT_virtuality; + AttrEntry DW_AT_visibility; + AttrEntry DW_AT_vtable_elem_location; + AttrEntry DW_AT_type; + + // Insert any additional ones here... + }; + +public: + /// \brief Computes the ODR signature. + uint64_t computeDIEODRSignature(const DIE &Die); + + /// \brief Computes the CU signature. + uint64_t computeCUSignature(const DIE &Die); + + /// \brief Computes the type signature. + uint64_t computeTypeSignature(const DIE &Die); + + // Helper routines to process parts of a DIE. +private: + /// \brief Adds the parent context of \param Die to the hash. + void addParentContext(const DIE &Die); + + /// \brief Adds the attributes of \param Die to the hash. + void addAttributes(const DIE &Die); + + /// \brief Computes the full DWARF4 7.27 hash of the DIE. + void computeHash(const DIE &Die); + + // Routines that add DIEValues to the hash. +private: + /// \brief Encodes and adds \param Value to the hash as a ULEB128. + void addULEB128(uint64_t Value); + + /// \brief Encodes and adds \param Value to the hash as a SLEB128. + void addSLEB128(int64_t Value); + + /// \brief Adds \param Str to the hash and includes a NULL byte. + void addString(StringRef Str); + + /// \brief Collects the attributes of DIE \param Die into the \param Attrs + /// structure. + void collectAttributes(const DIE &Die, DIEAttrs &Attrs); + + /// \brief Hashes the attributes in \param Attrs in order. + void hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag); + + /// \brief Hashes an individual attribute. + void hashAttribute(AttrEntry Attr, dwarf::Tag Tag); + + /// \brief Hashes an attribute that refers to another DIE. + void hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, + const DIE &Entry); + + /// \brief Hashes a reference to a named type in such a way that is + /// independent of whether that type is described by a declaration or a + /// definition. + void hashShallowTypeReference(dwarf::Attribute Attribute, const DIE &Entry, + StringRef Name); + + /// \brief Hashes a reference to a previously referenced type DIE. + void hashRepeatedTypeReference(dwarf::Attribute Attribute, unsigned DieNumber); + + void hashNestedType(const DIE &Die, StringRef Name); + +private: + MD5 Hash; + DenseMap<const DIE *, unsigned> Numbering; +}; +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index f58ec9b..689aeda 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -24,27 +24,14 @@ using namespace llvm; -const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) { - switch (AT) { - case eAtomTypeNULL: return "eAtomTypeNULL"; - case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset"; - case eAtomTypeCUOffset: return "eAtomTypeCUOffset"; - case eAtomTypeTag: return "eAtomTypeTag"; - case eAtomTypeNameFlags: return "eAtomTypeNameFlags"; - case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags"; - } - llvm_unreachable("invalid AtomType!"); -} - // The length of the header data is always going to be 4 + 4 + 4*NumAtoms. -DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) : - Header(8 + (atomList.size() * 4)), - HeaderData(atomList), - Entries(Allocator) { } +DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) + : Header(8 + (atomList.size() * 4)), HeaderData(atomList), + Entries(Allocator) {} -DwarfAccelTable::~DwarfAccelTable() { } +DwarfAccelTable::~DwarfAccelTable() {} -void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) { +void DwarfAccelTable::AddName(StringRef Name, DIE *die, char Flags) { assert(Data.empty() && "Already finalized!"); // If the string is in the list already then add this die to the list // otherwise add a new one. @@ -59,13 +46,16 @@ void DwarfAccelTable::ComputeBucketCount(void) { uniques[i] = Data[i]->HashValue; array_pod_sort(uniques.begin(), uniques.end()); std::vector<uint32_t>::iterator p = - std::unique(uniques.begin(), uniques.end()); + std::unique(uniques.begin(), uniques.end()); uint32_t num = std::distance(uniques.begin(), p); // Then compute the bucket size, minimum of 1 bucket. - if (num > 1024) Header.bucket_count = num/4; - if (num > 16) Header.bucket_count = num/2; - else Header.bucket_count = num > 0 ? num : 1; + if (num > 1024) + Header.bucket_count = num / 4; + if (num > 16) + Header.bucket_count = num / 2; + else + Header.bucket_count = num > 0 ? num : 1; Header.hashes_count = num; } @@ -76,15 +66,15 @@ static bool compareDIEs(const DwarfAccelTable::HashDataContents *A, return A->Die->getOffset() < B->Die->getOffset(); } -void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) { +void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) { // Create the individual hash data outputs. - for (StringMap<DataArray>::iterator - EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end(); + EI != EE; ++EI) { // Unique the entries. std::stable_sort(EI->second.begin(), EI->second.end(), compareDIEs); EI->second.erase(std::unique(EI->second.begin(), EI->second.end()), - EI->second.end()); + EI->second.end()); HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second); Data.push_back(Entry); @@ -126,7 +116,7 @@ void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { Asm->EmitInt32(HeaderData.Atoms.size()); for (size_t i = 0; i < HeaderData.Atoms.size(); i++) { Atom A = HeaderData.Atoms[i]; - Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type)); + Asm->OutStreamer.AddComment(dwarf::AtomTypeString(A.type)); Asm->EmitInt16(A.type); Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form)); Asm->EmitInt16(A.form); @@ -152,7 +142,8 @@ void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); HI != HE; ++HI) { + HE = Buckets[i].end(); + HI != HE; ++HI) { Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i)); Asm->EmitInt32((*HI)->HashValue); } @@ -166,13 +157,13 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); HI != HE; ++HI) { + HE = Buckets[i].end(); + HI != HE; ++HI) { Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i)); MCContext &Context = Asm->OutStreamer.getContext(); - const MCExpr *Sub = - MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context), - MCSymbolRefExpr::Create(SecBegin, Context), - Context); + const MCExpr *Sub = MCBinaryExpr::CreateSub( + MCSymbolRefExpr::Create((*HI)->Sym, Context), + MCSymbolRefExpr::Create(SecBegin, Context), Context); Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t)); } } @@ -185,7 +176,8 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { uint64_t PrevHash = UINT64_MAX; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); HI != HE; ++HI) { + HE = Buckets[i].end(); + HI != HE; ++HI) { // Remember to emit the label for our offset. Asm->OutStreamer.EmitLabel((*HI)->Sym); Asm->OutStreamer.AddComment((*HI)->Str); @@ -193,8 +185,9 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { D->getStringPoolSym()); Asm->OutStreamer.AddComment("Num DIEs"); Asm->EmitInt32((*HI)->Data.size()); - for (ArrayRef<HashDataContents*>::const_iterator - DI = (*HI)->Data.begin(), DE = (*HI)->Data.end(); + for (ArrayRef<HashDataContents *>::const_iterator + DI = (*HI)->Data.begin(), + DE = (*HI)->Data.end(); DI != DE; ++DI) { // Emit the DIE offset Asm->EmitInt32((*DI)->Die->getOffset()); @@ -214,8 +207,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { } // Emit the entire data structure to the output file. -void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, - DwarfUnits *D) { +void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfUnits *D) { // Emit the header. EmitHeader(Asm); @@ -239,11 +231,12 @@ void DwarfAccelTable::print(raw_ostream &O) { HeaderData.print(O); O << "Entries: \n"; - for (StringMap<DataArray>::const_iterator - EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + for (StringMap<DataArray>::const_iterator EI = Entries.begin(), + EE = Entries.end(); + EI != EE; ++EI) { O << "Name: " << EI->getKeyData() << "\n"; for (DataArray::const_iterator DI = EI->second.begin(), - DE = EI->second.end(); + DE = EI->second.end(); DI != DE; ++DI) (*DI)->print(O); } @@ -251,14 +244,14 @@ void DwarfAccelTable::print(raw_ostream &O) { O << "Buckets and Hashes: \n"; for (size_t i = 0, e = Buckets.size(); i < e; ++i) for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); HI != HE; ++HI) + HE = Buckets[i].end(); + HI != HE; ++HI) (*HI)->print(O); O << "Data: \n"; - for (std::vector<HashData*>::const_iterator - DI = Data.begin(), DE = Data.end(); DI != DE; ++DI) - (*DI)->print(O); - - + for (std::vector<HashData *>::const_iterator DI = Data.begin(), + DE = Data.end(); + DI != DE; ++DI) + (*DI)->print(O); } #endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 9915bca..7627313 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -67,11 +67,7 @@ class DwarfUnits; class DwarfAccelTable { - enum HashFunctionType { - eHashFunctionDJB = 0u - }; - - static uint32_t HashDJB (StringRef Str) { + static uint32_t HashDJB(StringRef Str) { uint32_t h = 5381; for (unsigned i = 0, e = Str.size(); i != e; ++i) h = ((h << 5) + h) + Str[i]; @@ -80,25 +76,25 @@ class DwarfAccelTable { // Helper function to compute the number of buckets needed based on // the number of unique hashes. - void ComputeBucketCount (void); + void ComputeBucketCount(void); struct TableHeader { - uint32_t magic; // 'HASH' magic value to allow endian detection - uint16_t version; // Version number. - uint16_t hash_function; // The hash function enumeration that was used. - uint32_t bucket_count; // The number of buckets in this hash table. - uint32_t hashes_count; // The total number of unique hash values - // and hash data offsets in this table. - uint32_t header_data_len; // The bytes to skip to get to the hash - // indexes (buckets) for correct alignment. + uint32_t magic; // 'HASH' magic value to allow endian detection + uint16_t version; // Version number. + uint16_t hash_function; // The hash function enumeration that was used. + uint32_t bucket_count; // The number of buckets in this hash table. + uint32_t hashes_count; // The total number of unique hash values + // and hash data offsets in this table. + uint32_t header_data_len; // The bytes to skip to get to the hash + // indexes (buckets) for correct alignment. // Also written to disk is the implementation specific header data. static const uint32_t MagicHash = 0x48415348; - TableHeader (uint32_t data_len) : - magic (MagicHash), version (1), hash_function (eHashFunctionDJB), - bucket_count (0), hashes_count (0), header_data_len (data_len) - {} + TableHeader(uint32_t data_len) + : magic(MagicHash), version(1), + hash_function(dwarf::DW_hash_function_djb), bucket_count(0), + hashes_count(0), header_data_len(data_len) {} #ifndef NDEBUG void print(raw_ostream &O) { @@ -124,62 +120,38 @@ public: // uint32_t die_offset_base // uint32_t atom_count // atom_count Atoms - enum AtomType { - eAtomTypeNULL = 0u, - eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding - eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that - // contains the item in question - eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as - // DW_FORM_data1 (if no tags exceed 255) or - // DW_FORM_data2. - eAtomTypeNameFlags = 4u, // Flags from enum NameFlags - eAtomTypeTypeFlags = 5u // Flags from enum TypeFlags - }; - - enum TypeFlags { - eTypeFlagClassMask = 0x0000000fu, - - // Always set for C++, only set for ObjC if this is the - // @implementation for a class. - eTypeFlagClassIsImplementation = ( 1u << 1 ) - }; // Make these public so that they can be used as a general interface to // the class. struct Atom { - AtomType type; // enum AtomType + uint16_t type; // enum AtomType uint16_t form; // DWARF DW_FORM_ defines - Atom(AtomType type, uint16_t form) : type(type), form(form) {} - static const char * AtomTypeString(enum AtomType); + Atom(uint16_t type, uint16_t form) : type(type), form(form) {} #ifndef NDEBUG void print(raw_ostream &O) { - O << "Type: " << AtomTypeString(type) << "\n" + O << "Type: " << dwarf::AtomTypeString(type) << "\n" << "Form: " << dwarf::FormEncodingString(form) << "\n"; } - void dump() { - print(dbgs()); - } + void dump() { print(dbgs()); } #endif }; - private: +private: struct TableHeaderData { uint32_t die_offset_base; SmallVector<Atom, 1> Atoms; TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0) - : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) { } + : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {} #ifndef NDEBUG - void print (raw_ostream &O) { + void print(raw_ostream &O) { O << "die_offset_base: " << die_offset_base << "\n"; for (size_t i = 0; i < Atoms.size(); i++) Atoms[i].print(O); } - void dump() { - print(dbgs()); - } + void dump() { print(dbgs()); } #endif }; @@ -193,37 +165,38 @@ public: // HashData[hash_data_count] public: struct HashDataContents { - DIE *Die; // Offsets + DIE *Die; // Offsets char Flags; // Specific flags to output - HashDataContents(DIE *D, char Flags) : - Die(D), - Flags(Flags) { } - #ifndef NDEBUG + HashDataContents(DIE *D, char Flags) : Die(D), Flags(Flags) {} +#ifndef NDEBUG void print(raw_ostream &O) const { O << " Offset: " << Die->getOffset() << "\n"; O << " Tag: " << dwarf::TagString(Die->getTag()) << "\n"; O << " Flags: " << Flags << "\n"; } - #endif +#endif }; + private: struct HashData { StringRef Str; uint32_t HashValue; MCSymbol *Sym; - ArrayRef<HashDataContents*> Data; // offsets - HashData(StringRef S, ArrayRef<HashDataContents*> Data) - : Str(S), Data(Data) { + ArrayRef<HashDataContents *> Data; // offsets + HashData(StringRef S, ArrayRef<HashDataContents *> Data) + : Str(S), Data(Data) { HashValue = DwarfAccelTable::HashDJB(S); } - #ifndef NDEBUG +#ifndef NDEBUG void print(raw_ostream &O) { O << "Name: " << Str << "\n"; O << " Hash Value: " << format("0x%x", HashValue) << "\n"; - O << " Symbol: " ; - if (Sym) Sym->print(O); - else O << "<none>"; + O << " Symbol: "; + if (Sym) + Sym->print(O); + else + O << "<none>"; O << "\n"; for (size_t i = 0; i < Data.size(); i++) { O << " Offset: " << Data[i]->Die->getOffset() << "\n"; @@ -231,14 +204,12 @@ private: O << " Flags: " << Data[i]->Flags << "\n"; } } - void dump() { - print(dbgs()); - } - #endif + void dump() { print(dbgs()); } +#endif }; - DwarfAccelTable(const DwarfAccelTable&) LLVM_DELETED_FUNCTION; - void operator=(const DwarfAccelTable&) LLVM_DELETED_FUNCTION; + DwarfAccelTable(const DwarfAccelTable &) LLVM_DELETED_FUNCTION; + void operator=(const DwarfAccelTable &) LLVM_DELETED_FUNCTION; // Internal Functions void EmitHeader(AsmPrinter *); @@ -253,31 +224,30 @@ private: // Output Variables TableHeader Header; TableHeaderData HeaderData; - std::vector<HashData*> Data; + std::vector<HashData *> Data; // String Data - typedef std::vector<HashDataContents*> DataArray; - typedef StringMap<DataArray, BumpPtrAllocator&> StringEntries; + typedef std::vector<HashDataContents *> DataArray; + typedef StringMap<DataArray, BumpPtrAllocator &> StringEntries; StringEntries Entries; // Buckets/Hashes/Offsets - typedef std::vector<HashData*> HashList; + typedef std::vector<HashData *> HashList; typedef std::vector<HashList> BucketList; BucketList Buckets; HashList Hashes; // Public Implementation - public: +public: DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>); ~DwarfAccelTable(); - void AddName(StringRef, DIE*, char = 0); - void FinalizeTable(AsmPrinter *, const char *); + void AddName(StringRef, DIE *, char = 0); + void FinalizeTable(AsmPrinter *, StringRef); void Emit(AsmPrinter *, MCSymbol *, DwarfUnits *); #ifndef NDEBUG void print(raw_ostream &O); void dump() { print(dbgs()); } #endif }; - } #endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index fec5ced..8918f3d 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -68,7 +68,7 @@ void DwarfCFIException::EndModule() { for (size_t i = 0, e = Personalities.size(); i != e; ++i) { if (!Personalities[i]) continue; - MCSymbol *Sym = Asm->Mang->getSymbol(Personalities[i]); + MCSymbol *Sym = Asm->getSymbol(Personalities[i]); TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym); AtLeastOne = true; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 89abcff..97ef687 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -22,21 +22,23 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; /// CompileUnit - Compile unit constructor. -CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, - DwarfDebug *DW, DwarfUnits *DWU) - : UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU), - IndexTyDie(0), DebugInfoOffset(0) { +CompileUnit::CompileUnit(unsigned UID, DIE *D, DICompileUnit Node, + AsmPrinter *A, DwarfDebug *DW, DwarfUnits *DWU) + : UniqueID(UID), Node(Node), CUDie(D), Asm(A), DD(DW), DU(DWU), + IndexTyDie(0), DebugInfoOffset(0) { DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); + insertDIE(Node, D); } /// ~CompileUnit - Destructor for compile unit. @@ -55,7 +57,7 @@ DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) { /// getDefaultLowerBound - Return the default lower bound for an array. If the /// DWARF version doesn't handle the language, return -1. int64_t CompileUnit::getDefaultLowerBound() const { - switch (Language) { + switch (getLanguage()) { default: break; @@ -96,32 +98,71 @@ int64_t CompileUnit::getDefaultLowerBound() const { return -1; } +/// Check whether the DIE for this MDNode can be shared across CUs. +static bool isShareableAcrossCUs(DIDescriptor D) { + // When the MDNode can be part of the type system, the DIE can be + // shared across CUs. + return D.isType() || + (D.isSubprogram() && !DISubprogram(D).isDefinition()); +} + +/// getDIE - Returns the debug information entry map slot for the +/// specified debug variable. We delegate the request to DwarfDebug +/// when the DIE for this MDNode can be shared across CUs. The mappings +/// will be kept in DwarfDebug for shareable DIEs. +DIE *CompileUnit::getDIE(DIDescriptor D) const { + if (isShareableAcrossCUs(D)) + return DD->getDIE(D); + return MDNodeToDieMap.lookup(D); +} + +/// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug +/// when the DIE for this MDNode can be shared across CUs. The mappings +/// will be kept in DwarfDebug for shareable DIEs. +void CompileUnit::insertDIE(DIDescriptor Desc, DIE *D) { + if (isShareableAcrossCUs(Desc)) { + DD->insertDIE(Desc, D); + return; + } + MDNodeToDieMap.insert(std::make_pair(Desc, D)); +} + /// addFlag - Add a flag that is true. -void CompileUnit::addFlag(DIE *Die, unsigned Attribute) { - if (!DD->useDarwinGDBCompat()) - Die->addValue(Attribute, dwarf::DW_FORM_flag_present, - DIEIntegerOne); +void CompileUnit::addFlag(DIE *Die, dwarf::Attribute Attribute) { + if (DD->getDwarfVersion() >= 4) + Die->addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne); else - addUInt(Die, Attribute, dwarf::DW_FORM_flag, 1); + Die->addValue(Attribute, dwarf::DW_FORM_flag, DIEIntegerOne); } /// addUInt - Add an unsigned integer attribute data and value. /// -void CompileUnit::addUInt(DIE *Die, unsigned Attribute, - unsigned Form, uint64_t Integer) { - if (!Form) Form = DIEInteger::BestForm(false, Integer); - DIEValue *Value = Integer == 1 ? - DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, Form, Value); +void CompileUnit::addUInt(DIE *Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, uint64_t Integer) { + if (!Form) + Form = DIEInteger::BestForm(false, Integer); + DIEValue *Value = Integer == 1 ? DIEIntegerOne : new (DIEValueAllocator) + DIEInteger(Integer); + Die->addValue(Attribute, *Form, Value); +} + +void CompileUnit::addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer) { + addUInt(Block, (dwarf::Attribute)0, Form, Integer); } /// addSInt - Add an signed integer attribute data and value. /// -void CompileUnit::addSInt(DIE *Die, unsigned Attribute, - unsigned Form, int64_t Integer) { - if (!Form) Form = DIEInteger::BestForm(true, Integer); +void CompileUnit::addSInt(DIE *Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, int64_t Integer) { + if (!Form) + Form = DIEInteger::BestForm(true, Integer); DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, Form, Value); + Die->addValue(Attribute, *Form, Value); +} + +void CompileUnit::addSInt(DIEBlock *Die, Optional<dwarf::Form> Form, + int64_t Integer) { + addSInt(Die, (dwarf::Attribute)0, Form, Integer); } /// addString - Add a string attribute data and value. We always emit a @@ -129,27 +170,31 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute, /// more predictable sizes. In the case of split dwarf we emit an index /// into another table which gets us the static offset into the string /// table. -void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) { +void CompileUnit::addString(DIE *Die, dwarf::Attribute Attribute, + StringRef String) { + DIEValue *Value; + dwarf::Form Form; if (!DD->useSplitDwarf()) { MCSymbol *Symb = DU->getStringPoolEntry(String); - DIEValue *Value; if (Asm->needsRelocationsForDwarfStringPool()) Value = new (DIEValueAllocator) DIELabel(Symb); else { MCSymbol *StringPool = DU->getStringPoolSym(); Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); } - Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); + Form = dwarf::DW_FORM_strp; } else { unsigned idx = DU->getStringPoolIndex(String); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); - Die->addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Value); + Value = new (DIEValueAllocator) DIEInteger(idx); + Form = dwarf::DW_FORM_GNU_str_index; } + DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); + Die->addValue(Attribute, Form, Str); } /// addLocalString - Add a string attribute data and value. This is guaranteed /// to be in the local string pool instead of indirected. -void CompileUnit::addLocalString(DIE *Die, unsigned Attribute, +void CompileUnit::addLocalString(DIE *Die, dwarf::Attribute Attribute, StringRef String) { MCSymbol *Symb = DU->getStringPoolEntry(String); DIEValue *Value; @@ -162,19 +207,54 @@ void CompileUnit::addLocalString(DIE *Die, unsigned Attribute, Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); } +/// addExpr - Add a Dwarf expression attribute data and value. +/// +void CompileUnit::addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr) { + DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr); + Die->addValue((dwarf::Attribute)0, Form, Value); +} + /// addLabel - Add a Dwarf label attribute data and value. /// -void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Label) { +void CompileUnit::addLabel(DIE *Die, dwarf::Attribute Attribute, + dwarf::Form Form, const MCSymbol *Label) { DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); Die->addValue(Attribute, Form, Value); } +void CompileUnit::addLabel(DIEBlock *Die, dwarf::Form Form, + const MCSymbol *Label) { + addLabel(Die, (dwarf::Attribute)0, Form, Label); +} + +/// addSectionLabel - Add a Dwarf section label attribute data and value. +/// +void CompileUnit::addSectionLabel(DIE *Die, dwarf::Attribute Attribute, + const MCSymbol *Label) { + if (DD->getDwarfVersion() >= 4) + addLabel(Die, Attribute, dwarf::DW_FORM_sec_offset, Label); + else + addLabel(Die, Attribute, dwarf::DW_FORM_data4, Label); +} + +/// addSectionOffset - Add an offset into a section attribute data and value. +/// +void CompileUnit::addSectionOffset(DIE *Die, dwarf::Attribute Attribute, + uint64_t Integer) { + if (DD->getDwarfVersion() >= 4) + addUInt(Die, Attribute, dwarf::DW_FORM_sec_offset, Integer); + else + addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer); +} + /// addLabelAddress - Add a dwarf label attribute data and value using /// DW_FORM_addr or DW_FORM_GNU_addr_index. /// -void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute, +void CompileUnit::addLabelAddress(DIE *Die, dwarf::Attribute Attribute, MCSymbol *Label) { + if (Label) + DD->addArangeLabel(SymbolCU(this, Label)); + if (!DD->useSplitDwarf()) { if (Label != NULL) { DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); @@ -193,37 +273,62 @@ void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute, /// addOpAddress - Add a dwarf op address data and value using the /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. /// -void CompileUnit::addOpAddress(DIE *Die, MCSymbol *Sym) { - +void CompileUnit::addOpAddress(DIEBlock *Die, const MCSymbol *Sym) { + DD->addArangeLabel(SymbolCU(this, Sym)); if (!DD->useSplitDwarf()) { - addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Die, 0, dwarf::DW_FORM_udata, Sym); + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Die, dwarf::DW_FORM_udata, Sym); } else { - unsigned idx = DU->getAddrPoolIndex(Sym); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); - addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); - Die->addValue(0, dwarf::DW_FORM_GNU_addr_index, Value); + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); + addUInt(Die, dwarf::DW_FORM_GNU_addr_index, DU->getAddrPoolIndex(Sym)); } } -/// addDelta - Add a label delta attribute data and value. +/// addSectionDelta - Add a section label delta attribute data and value. /// -void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Hi, const MCSymbol *Lo) { +void CompileUnit::addSectionDelta(DIE *Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo) { DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); - Die->addValue(Attribute, Form, Value); + if (DD->getDwarfVersion() >= 4) + Die->addValue(Attribute, dwarf::DW_FORM_sec_offset, Value); + else + Die->addValue(Attribute, dwarf::DW_FORM_data4, Value); } /// addDIEEntry - Add a DIE attribute data and value. /// -void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, +void CompileUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry) { - Die->addValue(Attribute, Form, createDIEEntry(Entry)); + addDIEEntry(Die, Attribute, createDIEEntry(Entry)); +} + +void CompileUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, + DIEEntry *Entry) { + const DIE *DieCU = Die->getCompileUnitOrNull(); + const DIE *EntryCU = Entry->getEntry()->getCompileUnitOrNull(); + if (!DieCU) + // We assume that Die belongs to this CU, if it is not linked to any CU yet. + DieCU = getCUDie(); + if (!EntryCU) + EntryCU = getCUDie(); + Die->addValue(Attribute, EntryCU == DieCU ? dwarf::DW_FORM_ref4 + : dwarf::DW_FORM_ref_addr, + Entry); +} + +/// Create a DIE with the given Tag, add the DIE to its parent, and +/// call insertDIE if MD is not null. +DIE *CompileUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) { + DIE *Die = new DIE(Tag); + Parent.addChild(Die); + if (N) + insertDIE(N, Die); + return Die; } /// addBlock - Add block data. /// -void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form, +void CompileUnit::addBlock(DIE *Die, dwarf::Attribute Attribute, DIEBlock *Block) { Block->ComputeSize(Asm); DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. @@ -234,42 +339,42 @@ void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form, /// entry. void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { // Verify variable. - if (!V.Verify()) + if (!V.isVariable()) return; unsigned Line = V.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(V.getContext().getFilename(), - V.getContext().getDirectory(), - getUniqueID()); + unsigned FileID = + DD->getOrCreateSourceID(V.getContext().getFilename(), + V.getContext().getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information /// entry. void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { // Verify global variable. - if (!G.Verify()) + if (!G.isGlobalVariable()) return; unsigned Line = G.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(), - getUniqueID()); + unsigned FileID = + DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information /// entry. void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { // Verify subprogram. - if (!SP.Verify()) + if (!SP.isSubprogram()) return; // If the line number is 0, don't add it. @@ -277,35 +382,35 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(), - SP.getDirectory(), getUniqueID()); + unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(), SP.getDirectory(), + getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information /// entry. void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { // Verify type. - if (!Ty.Verify()) + if (!Ty.isType()) return; unsigned Line = Ty.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(), - Ty.getDirectory(), getUniqueID()); + unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(), Ty.getDirectory(), + getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information /// entry. void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { // Verify type. - if (!Ty.Verify()) + if (!Ty.isObjCProperty()) return; unsigned Line = Ty.getLineNumber(); @@ -315,8 +420,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { unsigned FileID = DD->getOrCreateSourceID(File.getFilename(), File.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information @@ -331,68 +436,73 @@ void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) { return; StringRef FN = NS.getFilename(); - unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory(), - getUniqueID()); + unsigned FileID = + DD->getOrCreateSourceID(FN, NS.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addVariableAddress - Add DW_AT_location attribute for a /// DbgVariable based on provided MachineLocation. -void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die, +void CompileUnit::addVariableAddress(const DbgVariable &DV, DIE *Die, MachineLocation Location) { - if (DV->variableHasComplexAddress()) + if (DV.variableHasComplexAddress()) addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); - else if (DV->isBlockByrefVariable()) + else if (DV.isBlockByrefVariable()) addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); else - addAddress(Die, dwarf::DW_AT_location, Location); + addAddress(Die, dwarf::DW_AT_location, Location, + DV.getVariable().isIndirect()); } /// addRegisterOp - Add register operand. -void CompileUnit::addRegisterOp(DIE *TheDie, unsigned Reg) { +void CompileUnit::addRegisterOp(DIEBlock *TheDie, unsigned Reg) { const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); unsigned DWReg = RI->getDwarfRegNum(Reg, false); if (DWReg < 32) - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); else { - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + addUInt(TheDie, dwarf::DW_FORM_udata, DWReg); } } /// addRegisterOffset - Add register offset. -void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg, +void CompileUnit::addRegisterOffset(DIEBlock *TheDie, unsigned Reg, int64_t Offset) { const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); unsigned DWReg = RI->getDwarfRegNum(Reg, false); const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); if (Reg == TRI->getFrameRegister(*Asm->MF)) // If variable offset is based in frame register then use fbreg. - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); else if (DWReg < 32) - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg); else { - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(TheDie, dwarf::DW_FORM_udata, DWReg); } - addSInt(TheDie, 0, dwarf::DW_FORM_sdata, Offset); + addSInt(TheDie, dwarf::DW_FORM_sdata, Offset); } /// addAddress - Add an address attribute to a die based on the location /// provided. -void CompileUnit::addAddress(DIE *Die, unsigned Attribute, - const MachineLocation &Location) { +void CompileUnit::addAddress(DIE *Die, dwarf::Attribute Attribute, + const MachineLocation &Location, bool Indirect) { DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - if (Location.isReg()) + if (Location.isReg() && !Indirect) addRegisterOp(Block, Location.getReg()); - else + else { addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + if (Indirect && !Location.isReg()) { + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } + } // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); + addBlock(Die, Attribute, Block); } /// addComplexAddress - Start with the address based on the location provided, @@ -400,37 +510,37 @@ void CompileUnit::addAddress(DIE *Die, unsigned Attribute, /// given the extra address information encoded in the DIVariable, starting from /// the starting location. Add the DWARF information to the die. /// -void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die, - unsigned Attribute, +void CompileUnit::addComplexAddress(const DbgVariable &DV, DIE *Die, + dwarf::Attribute Attribute, const MachineLocation &Location) { DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - unsigned N = DV->getNumAddrElements(); + unsigned N = DV.getNumAddrElements(); unsigned i = 0; if (Location.isReg()) { - if (N >= 2 && DV->getAddrElement(0) == DIBuilder::OpPlus) { + if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { // If first address element is OpPlus then emit // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - addRegisterOffset(Block, Location.getReg(), DV->getAddrElement(1)); + addRegisterOffset(Block, Location.getReg(), DV.getAddrElement(1)); i = 2; } else addRegisterOp(Block, Location.getReg()); - } - else + } else addRegisterOffset(Block, Location.getReg(), Location.getOffset()); - for (;i < N; ++i) { - uint64_t Element = DV->getAddrElement(i); + for (; i < N; ++i) { + uint64_t Element = DV.getAddrElement(i); if (Element == DIBuilder::OpPlus) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, dwarf::DW_FORM_udata, DV.getAddrElement(++i)); } else if (Element == DIBuilder::OpDeref) { if (!Location.isReg()) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - } else llvm_unreachable("unknown DIBuilder Opcode"); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } else + llvm_unreachable("unknown DIBuilder Opcode"); } // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); + addBlock(Die, Attribute, Block); } /* Byref variables, in Blocks, are declared by the programmer as "SomeType @@ -493,45 +603,42 @@ void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die, /// starting location. Add the DWARF information to the die. For /// more information, read large comment just above here. /// -void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, - unsigned Attribute, +void CompileUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die, + dwarf::Attribute Attribute, const MachineLocation &Location) { - DIType Ty = DV->getType(); + DIType Ty = DV.getType(); DIType TmpTy = Ty; - unsigned Tag = Ty.getTag(); + uint16_t Tag = Ty.getTag(); bool isPointer = false; - StringRef varName = DV->getName(); + StringRef varName = DV.getName(); if (Tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - TmpTy = DTy.getTypeDerivedFrom(); + DIDerivedType DTy(Ty); + TmpTy = resolve(DTy.getTypeDerivedFrom()); isPointer = true; } - DICompositeType blockStruct = DICompositeType(TmpTy); + DICompositeType blockStruct(TmpTy); // Find the __forwarding field and the variable field in the __Block_byref // struct. DIArray Fields = blockStruct.getTypeArray(); - DIDescriptor varField = DIDescriptor(); - DIDescriptor forwardingField = DIDescriptor(); + DIDerivedType varField; + DIDerivedType forwardingField; for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { - DIDescriptor Element = Fields.getElement(i); - DIDerivedType DT = DIDerivedType(Element); + DIDerivedType DT(Fields.getElement(i)); StringRef fieldName = DT.getName(); if (fieldName == "__forwarding") - forwardingField = Element; + forwardingField = DT; else if (fieldName == varName) - varField = Element; + varField = DT; } // Get the offsets for the forwarding field and the variable field. - unsigned forwardingFieldOffset = - DIDerivedType(forwardingField).getOffsetInBits() >> 3; - unsigned varFieldOffset = - DIDerivedType(varField).getOffsetInBits() >> 3; + unsigned forwardingFieldOffset = forwardingField.getOffsetInBits() >> 3; + unsigned varFieldOffset = varField.getOffsetInBits() >> 2; // Decode the original location, and use that as the start of the byref // variable's location. @@ -545,76 +652,139 @@ void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, // If we started with a pointer to the __Block_byref... struct, then // the first thing we need to do is dereference the pointer (DW_OP_deref). if (isPointer) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); // Next add the offset for the '__forwarding' field: // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in // adding the offset if it's 0. if (forwardingFieldOffset > 0) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, dwarf::DW_FORM_udata, forwardingFieldOffset); } // Now dereference the __forwarding field to get to the real __Block_byref // struct: DW_OP_deref. - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); // Now that we've got the real __Block_byref... struct, add the offset // for the variable's field to get to the location of the actual variable: // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. if (varFieldOffset > 0) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, dwarf::DW_FORM_udata, varFieldOffset); } // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); + addBlock(Die, Attribute, Block); } /// isTypeSigned - Return true if the type is signed. -static bool isTypeSigned(DIType Ty, int *SizeInBits) { +static bool isTypeSigned(DwarfDebug *DD, DIType Ty, int *SizeInBits) { if (Ty.isDerivedType()) - return isTypeSigned(DIDerivedType(Ty).getTypeDerivedFrom(), SizeInBits); + return isTypeSigned(DD, DD->resolve(DIDerivedType(Ty).getTypeDerivedFrom()), + SizeInBits); if (Ty.isBasicType()) - if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed - || DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) { + if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed || + DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) { *SizeInBits = Ty.getSizeInBits(); return true; } return false; } +/// Return true if type encoding is unsigned. +static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { + DIDerivedType DTy(Ty); + if (DTy.isDerivedType()) + return isUnsignedDIType(DD, DD->resolve(DTy.getTypeDerivedFrom())); + + DIBasicType BTy(Ty); + if (BTy.isBasicType()) { + unsigned Encoding = BTy.getEncoding(); + if (Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_boolean) + return true; + } + return false; +} + +/// If this type is derived from a base type then return base type size. +static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) { + unsigned Tag = Ty.getTag(); + + if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && + Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && + Tag != dwarf::DW_TAG_restrict_type) + return Ty.getSizeInBits(); + + DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom()); + + // If this type is not derived from any type then take conservative approach. + if (!BaseType.isValid()) + return Ty.getSizeInBits(); + + // If this is a derived type, go ahead and get the base type, unless it's a + // reference then it's just the size of the field. Pointer types have no need + // of this since they're a different type of qualification on the type. + if (BaseType.getTag() == dwarf::DW_TAG_reference_type || + BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type) + return Ty.getSizeInBits(); + + if (BaseType.isDerivedType()) + return getBaseTypeSize(DD, DIDerivedType(BaseType)); + + return BaseType.getSizeInBits(); +} + /// addConstantValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, +void CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty) { + // FIXME: This is a bit conservative/simple - it emits negative values at + // their maximum bit width which is a bit unfortunate (& doesn't prefer + // udata/sdata over dataN as suggested by the DWARF spec) assert(MO.isImm() && "Invalid machine operand!"); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); int SizeInBits = -1; - bool SignedConstant = isTypeSigned(Ty, &SizeInBits); - unsigned Form = SignedConstant ? dwarf::DW_FORM_sdata : dwarf::DW_FORM_udata; - switch (SizeInBits) { - case 8: Form = dwarf::DW_FORM_data1; break; - case 16: Form = dwarf::DW_FORM_data2; break; - case 32: Form = dwarf::DW_FORM_data4; break; - case 64: Form = dwarf::DW_FORM_data8; break; - default: break; + bool SignedConstant = isTypeSigned(DD, Ty, &SizeInBits); + dwarf::Form Form; + + // If we're a signed constant definitely use sdata. + if (SignedConstant) { + addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, MO.getImm()); + return; } - SignedConstant ? addSInt(Block, 0, Form, MO.getImm()) - : addUInt(Block, 0, Form, MO.getImm()); - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; + // Else use data for now unless it's larger than we can deal with. + switch (SizeInBits) { + case 8: + Form = dwarf::DW_FORM_data1; + break; + case 16: + Form = dwarf::DW_FORM_data2; + break; + case 32: + Form = dwarf::DW_FORM_data4; + break; + case 64: + Form = dwarf::DW_FORM_data8; + break; + default: + Form = dwarf::DW_FORM_udata; + addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm()); + return; + } + addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm()); } /// addConstantFPValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { - assert (MO.isFPImm() && "Invalid machine operand!"); +void CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { + assert(MO.isFPImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); APFloat FPImm = MO.getFPImm()->getValueAPF(); // Get the raw data form of the floating point. const APInt FltVal = FPImm.bitcastToAPInt(); - const char *FltPtr = (const char*)FltVal.getRawData(); + const char *FltPtr = (const char *)FltVal.getRawData(); int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. bool LittleEndian = Asm->getDataLayout().isLittleEndian(); @@ -624,43 +794,56 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { // Output the constant to DWARF one byte at a time. for (; Start != Stop; Start += Incr) - addUInt(Block, 0, dwarf::DW_FORM_data1, - (unsigned char)0xFF & FltPtr[Start]); + addUInt(Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]); - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; + addBlock(Die, dwarf::DW_AT_const_value, Block); } /// addConstantFPValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { - return addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), false); +void CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { + // Pass this down to addConstantValue as an unsigned bag of bits. + addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true); } /// addConstantValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, +void CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned) { - return addConstantValue(Die, CI->getValue(), Unsigned); + addConstantValue(Die, CI->getValue(), Unsigned); } // addConstantValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val, - bool Unsigned) { +void CompileUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) { unsigned CIBitWidth = Val.getBitWidth(); if (CIBitWidth <= 64) { - unsigned form = 0; + // If we're a signed constant definitely use sdata. + if (!Unsigned) { + addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, + Val.getSExtValue()); + return; + } + + // Else use data for now unless it's larger than we can deal with. + dwarf::Form Form; switch (CIBitWidth) { - case 8: form = dwarf::DW_FORM_data1; break; - case 16: form = dwarf::DW_FORM_data2; break; - case 32: form = dwarf::DW_FORM_data4; break; - case 64: form = dwarf::DW_FORM_data8; break; + case 8: + Form = dwarf::DW_FORM_data1; + break; + case 16: + Form = dwarf::DW_FORM_data2; + break; + case 32: + Form = dwarf::DW_FORM_data4; + break; + case 64: + Form = dwarf::DW_FORM_data8; + break; default: - form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata; + addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + Val.getZExtValue()); + return; } - if (Unsigned) - addUInt(Die, dwarf::DW_AT_const_value, form, Val.getZExtValue()); - else - addSInt(Die, dwarf::DW_AT_const_value, form, Val.getSExtValue()); - return true; + addUInt(Die, dwarf::DW_AT_const_value, Form, Val.getZExtValue()); + return; } DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); @@ -678,11 +861,10 @@ bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val, c = Ptr64[i / 8] >> (8 * (i & 7)); else c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7)); - addUInt(Block, 0, dwarf::DW_FORM_data1, c); + addUInt(Block, dwarf::DW_FORM_data1, c); } - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; + addBlock(Die, dwarf::DW_AT_const_value, Block); } /// addTemplateParams - Add template parameters into buffer. @@ -691,47 +873,48 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) { DIDescriptor Element = TParams.getElement(i); if (Element.isTemplateTypeParameter()) - Buffer.addChild(getOrCreateTemplateTypeParameterDIE( - DITemplateTypeParameter(Element))); + constructTemplateTypeParameterDIE(Buffer, + DITemplateTypeParameter(Element)); else if (Element.isTemplateValueParameter()) - Buffer.addChild(getOrCreateTemplateValueParameterDIE( - DITemplateValueParameter(Element))); + constructTemplateValueParameterDIE(Buffer, + DITemplateValueParameter(Element)); } } /// getOrCreateContextDIE - Get context owner's DIE. -DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) { +DIE *CompileUnit::getOrCreateContextDIE(DIScope Context) { + if (!Context || Context.isFile()) + return getCUDie(); if (Context.isType()) return getOrCreateTypeDIE(DIType(Context)); - else if (Context.isNameSpace()) + if (Context.isNameSpace()) return getOrCreateNameSpace(DINameSpace(Context)); - else if (Context.isSubprogram()) + if (Context.isSubprogram()) return getOrCreateSubprogramDIE(DISubprogram(Context)); - else - return getDIE(Context); -} - -/// addToContextOwner - Add Die into the list of its context owner's children. -void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { - if (DIE *ContextDIE = getOrCreateContextDIE(Context)) - ContextDIE->addChild(Die); - else - addDie(Die); + return getDIE(Context); } /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { - DIType Ty(TyNode); - if (!Ty.Verify()) + if (!TyNode) return NULL; + + DIType Ty(TyNode); + assert(Ty.isType()); + + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(resolve(Ty.getContext())); + assert(ContextDIE); + DIE *TyDIE = getDIE(Ty); if (TyDIE) return TyDIE; // Create new type. - TyDIE = new DIE(dwarf::DW_TAG_base_type); - insertDIE(Ty, TyDIE); + TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); + if (Ty.isBasicType()) constructTypeDIE(*TyDIE, DIBasicType(Ty)); else if (Ty.isCompositeType()) @@ -748,28 +931,24 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { DICompositeType CT(Ty); // A runtime language of 0 actually means C/C++ and that any // non-negative value is some version of Objective-C/C++. - IsImplementation = (CT.getRunTimeLang() == 0) || - CT.isObjcClassComplete(); + IsImplementation = (CT.getRunTimeLang() == 0) || CT.isObjcClassComplete(); } - unsigned Flags = IsImplementation ? - DwarfAccelTable::eTypeFlagClassIsImplementation : 0; + unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0; addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags)); } - addToContextOwner(TyDIE, Ty.getContext()); return TyDIE; } /// addType - Add a new type attribute to the specified entity. -void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) { - if (!Ty.Verify()) - return; +void CompileUnit::addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute) { + assert(Ty && "Trying to add a type that doesn't exist?"); // Check for pre-existence. DIEEntry *Entry = getDIEEntry(Ty); // If it exists then use the existing value. if (Entry) { - Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); + addDIEEntry(Entity, Attribute, Entry); return; } @@ -779,35 +958,112 @@ void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) { // Set up proxy. Entry = createDIEEntry(Buffer); insertDIEEntry(Ty, Entry); - Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); + addDIEEntry(Entity, Attribute, Entry); // If this is a complete composite type then include it in the // list of global types. addGlobalType(Ty); } +// Accelerator table mutators - add each name along with its companion +// DIE to the proper table while ensuring that the name that we're going +// to reference is in the string table. We do this since the names we +// add may not only be identical to the names in the DIE. +void CompileUnit::addAccelName(StringRef Name, DIE *Die) { + DU->getStringPoolEntry(Name); + std::vector<DIE *> &DIEs = AccelNames[Name]; + DIEs.push_back(Die); +} + +void CompileUnit::addAccelObjC(StringRef Name, DIE *Die) { + DU->getStringPoolEntry(Name); + std::vector<DIE *> &DIEs = AccelObjC[Name]; + DIEs.push_back(Die); +} + +void CompileUnit::addAccelNamespace(StringRef Name, DIE *Die) { + DU->getStringPoolEntry(Name); + std::vector<DIE *> &DIEs = AccelNamespace[Name]; + DIEs.push_back(Die); +} + +void CompileUnit::addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) { + DU->getStringPoolEntry(Name); + std::vector<std::pair<DIE *, unsigned> > &DIEs = AccelTypes[Name]; + DIEs.push_back(Die); +} + +/// addGlobalName - Add a new global name to the compile unit. +void CompileUnit::addGlobalName(StringRef Name, DIE *Die, DIScope Context) { + std::string FullName = getParentContextString(Context) + Name.str(); + GlobalNames[FullName] = Die; +} + /// addGlobalType - Add a new global type to the compile unit. /// void CompileUnit::addGlobalType(DIType Ty) { - DIDescriptor Context = Ty.getContext(); - if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl() - && (!Context || Context.isCompileUnit() || Context.isFile() - || Context.isNameSpace())) - if (DIEEntry *Entry = getDIEEntry(Ty)) - GlobalTypes[Ty.getName()] = Entry->getEntry(); + DIScope Context = resolve(Ty.getContext()); + if (!Ty.getName().empty() && !Ty.isForwardDecl() && + (!Context || Context.isCompileUnit() || Context.isFile() || + Context.isNameSpace())) + if (DIEEntry *Entry = getDIEEntry(Ty)) { + std::string FullName = + getParentContextString(Context) + Ty.getName().str(); + GlobalTypes[FullName] = Entry->getEntry(); + } +} + +/// getParentContextString - Walks the metadata parent chain in a language +/// specific manner (using the compile unit language) and returns +/// it as a string. This is done at the metadata level because DIEs may +/// not currently have been added to the parent context and walking the +/// DIEs looking for names is more expensive than walking the metadata. +std::string CompileUnit::getParentContextString(DIScope Context) const { + if (!Context) + return ""; + + // FIXME: Decide whether to implement this for non-C++ languages. + if (getLanguage() != dwarf::DW_LANG_C_plus_plus) + return ""; + + std::string CS; + SmallVector<DIScope, 1> Parents; + while (!Context.isCompileUnit()) { + Parents.push_back(Context); + if (Context.getContext()) + Context = resolve(Context.getContext()); + else + // Structure, etc types will have a NULL context if they're at the top + // level. + break; + } + + // Reverse iterate over our list to go from the outermost construct to the + // innermost. + for (SmallVectorImpl<DIScope>::reverse_iterator I = Parents.rbegin(), + E = Parents.rend(); + I != E; ++I) { + DIScope Ctx = *I; + StringRef Name = Ctx.getName(); + if (!Name.empty()) { + CS += Name; + CS += "::"; + } + } + return CS; } -/// addPubTypes - Add type for pubtypes section. +/// addPubTypes - Add subprogram argument types for pubtypes section. void CompileUnit::addPubTypes(DISubprogram SP) { DICompositeType SPTy = SP.getType(); - unsigned SPTag = SPTy.getTag(); + uint16_t SPTag = SPTy.getTag(); if (SPTag != dwarf::DW_TAG_subroutine_type) return; DIArray Args = SPTy.getTypeArray(); for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) { DIType ATy(Args.getElement(i)); - if (!ATy.Verify()) + if (!ATy.isType()) continue; addGlobalType(ATy); } @@ -821,18 +1077,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { if (!Name.empty()) addString(&Buffer, dwarf::DW_AT_name, Name); - if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) { - Buffer.setTag(dwarf::DW_TAG_unspecified_type); - // Unspecified types has only name, nothing else. + // An unspecified type only has a name attribute. + if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) return; - } - Buffer.setTag(dwarf::DW_TAG_base_type); addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, BTy.getEncoding()); uint64_t Size = BTy.getSizeInBits() >> 3; - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); } /// constructTypeDIE - Construct derived type die from DIDerivedType. @@ -840,16 +1093,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Get core information. StringRef Name = DTy.getName(); uint64_t Size = DTy.getSizeInBits() >> 3; - unsigned Tag = DTy.getTag(); - - // FIXME - Workaround for templates. - if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type; - - Buffer.setTag(Tag); + uint16_t Tag = Buffer.getTag(); // Map to main type, void will not have a type. - DIType FromTy = DTy.getTypeDerivedFrom(); - addType(&Buffer, FromTy); + DIType FromTy = resolve(DTy.getTypeDerivedFrom()); + if (FromTy) + addType(&Buffer, FromTy); // Add name if not anonymous or intermediate type. if (!Name.empty()) @@ -857,97 +1106,102 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Add size if non-zero (derived types might be zero-sized.) if (Size && Tag != dwarf::DW_TAG_pointer_type) - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); if (Tag == dwarf::DW_TAG_ptr_to_member_type) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, - getOrCreateTypeDIE(DTy.getClassType())); + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, + getOrCreateTypeDIE(resolve(DTy.getClassType()))); // Add source line info if available and TyDesc is not a forward declaration. if (!DTy.isForwardDecl()) addSourceLine(&Buffer, DTy); } +/// Return true if the type is appropriately scoped to be contained inside +/// its own type unit. +static bool isTypeUnitScoped(DIType Ty, const DwarfDebug *DD) { + DIScope Parent = DD->resolve(Ty.getContext()); + while (Parent) { + // Don't generate a hash for anything scoped inside a function. + if (Parent.isSubprogram()) + return false; + Parent = DD->resolve(Parent.getContext()); + } + return true; +} + +/// Return true if the type should be split out into a type unit. +static bool shouldCreateTypeUnit(DICompositeType CTy, const DwarfDebug *DD) { + uint16_t Tag = CTy.getTag(); + + switch (Tag) { + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_enumeration_type: + case dwarf::DW_TAG_class_type: + // If this is a class, structure, union, or enumeration type + // that is a definition (not a declaration), and not scoped + // inside a function then separate this out as a type unit. + return !CTy.isForwardDecl() && isTypeUnitScoped(CTy, DD); + default: + return false; + } +} + /// constructTypeDIE - Construct type DIE from DICompositeType. void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Get core information. StringRef Name = CTy.getName(); uint64_t Size = CTy.getSizeInBits() >> 3; - unsigned Tag = CTy.getTag(); - Buffer.setTag(Tag); + uint16_t Tag = Buffer.getTag(); switch (Tag) { case dwarf::DW_TAG_array_type: - constructArrayTypeDIE(Buffer, &CTy); + constructArrayTypeDIE(Buffer, CTy); break; - case dwarf::DW_TAG_enumeration_type: { - DIArray Elements = CTy.getTypeArray(); - - // Add enumerators to enumeration type. - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIE *ElemDie = NULL; - DIDescriptor Enum(Elements.getElement(i)); - if (Enum.isEnumerator()) { - ElemDie = constructEnumTypeDIE(DIEnumerator(Enum)); - Buffer.addChild(ElemDie); - } - } - DIType DTy = CTy.getTypeDerivedFrom(); - if (DTy.Verify()) { - addType(&Buffer, DTy); - addUInt(&Buffer, dwarf::DW_AT_enum_class, dwarf::DW_FORM_flag, 1); - } - } + case dwarf::DW_TAG_enumeration_type: + constructEnumTypeDIE(Buffer, CTy); break; case dwarf::DW_TAG_subroutine_type: { - // Add return type. + // Add return type. A void return won't have a type. DIArray Elements = CTy.getTypeArray(); - DIDescriptor RTy = Elements.getElement(0); - addType(&Buffer, DIType(RTy)); + DIType RTy(Elements.getElement(0)); + if (RTy) + addType(&Buffer, RTy); bool isPrototyped = true; // Add arguments. for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Ty = Elements.getElement(i); if (Ty.isUnspecifiedParameter()) { - DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters); - Buffer.addChild(Arg); + createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer); isPrototyped = false; } else { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer); addType(Arg, DIType(Ty)); if (DIType(Ty).isArtificial()) addFlag(Arg, dwarf::DW_AT_artificial); - Buffer.addChild(Arg); } } // Add prototype flag if we're dealing with a C language and the // function has been prototyped. + uint16_t Language = getLanguage(); if (isPrototyped && - (Language == dwarf::DW_LANG_C89 || - Language == dwarf::DW_LANG_C99 || + (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) addFlag(&Buffer, dwarf::DW_AT_prototyped); - } - break; + } break; case dwarf::DW_TAG_structure_type: case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_class_type: { // Add elements to structure type. DIArray Elements = CTy.getTypeArray(); - - // A forward struct declared type may not have elements available. - unsigned N = Elements.getNumElements(); - if (N == 0) - break; - - // Add elements to structure type. - for (unsigned i = 0; i < N; ++i) { + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); DIE *ElemDie = NULL; if (Element.isSubprogram()) { DISubprogram SP(Element); - ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element)); + ElemDie = getOrCreateSubprogramDIE(SP); if (SP.isProtected()) addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_protected); @@ -956,21 +1210,23 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { dwarf::DW_ACCESS_private); else addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_public); + dwarf::DW_ACCESS_public); if (SP.isExplicit()) addFlag(ElemDie, dwarf::DW_AT_explicit); } else if (Element.isDerivedType()) { DIDerivedType DDTy(Element); if (DDTy.getTag() == dwarf::DW_TAG_friend) { - ElemDie = new DIE(dwarf::DW_TAG_friend); - addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend); - } else if (DDTy.isStaticMember()) - ElemDie = createStaticMemberDIE(DDTy); - else - ElemDie = createMemberDIE(DDTy); + ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer); + addType(ElemDie, resolve(DDTy.getTypeDerivedFrom()), + dwarf::DW_AT_friend); + } else if (DDTy.isStaticMember()) { + getOrCreateStaticMemberDIE(DDTy); + } else { + constructMemberDIE(Buffer, DDTy); + } } else if (Element.isObjCProperty()) { DIObjCProperty Property(Element); - ElemDie = new DIE(Property.getTag()); + ElemDie = createAndAddDIE(Property.getTag(), Buffer); StringRef PropertyName = Property.getObjCPropertyName(); addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); addType(ElemDie, Property.getType()); @@ -995,8 +1251,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (Property.isNonAtomicObjCProperty()) PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; if (PropertyAttributes) - addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0, - PropertyAttributes); + addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None, + PropertyAttributes); DIEEntry *Entry = getDIEEntry(Element); if (!Entry) { @@ -1005,20 +1261,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } } else continue; - Buffer.addChild(ElemDie); } if (CTy.isAppleBlockExtension()) addFlag(&Buffer, dwarf::DW_AT_APPLE_block); - DICompositeType ContainingType = CTy.getContainingType(); - if (DIDescriptor(ContainingType).isCompositeType()) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, - getOrCreateTypeDIE(DIType(ContainingType))); - else { - DIDescriptor Context = CTy.getContext(); - addToContextOwner(&Buffer, Context); - } + DICompositeType ContainingType(resolve(CTy.getContainingType())); + if (ContainingType) + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, + getOrCreateTypeDIE(ContainingType)); if (CTy.isObjcClassComplete()) addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type); @@ -1026,8 +1277,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add template parameters to a class, structure or union types. // FIXME: The support isn't in the metadata for this yet. if (Tag == dwarf::DW_TAG_class_type || - Tag == dwarf::DW_TAG_structure_type || - Tag == dwarf::DW_TAG_union_type) + Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) addTemplateParams(Buffer, CTy.getTemplateParams()); break; @@ -1041,16 +1291,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addString(&Buffer, dwarf::DW_AT_name, Name); if (Tag == dwarf::DW_TAG_enumeration_type || - Tag == dwarf::DW_TAG_class_type || - Tag == dwarf::DW_TAG_structure_type || + Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { // Add size if non-zero (derived types might be zero-sized.) // TODO: Do we care about size for enum forward declarations? if (Size) - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); else if (!CTy.isForwardDecl()) // Add zero size if it is not a forward declaration. - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, 0); // If we're a forward decl, say so. if (CTy.isForwardDecl()) @@ -1063,117 +1312,128 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // No harm in adding the runtime language to the declaration. unsigned RLang = CTy.getRunTimeLang(); if (RLang) - addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, - dwarf::DW_FORM_data1, RLang); + addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, + RLang); } + // If this is a type applicable to a type unit it then add it to the + // list of types we'll compute a hash for later. + if (shouldCreateTypeUnit(CTy, DD)) + DD->addTypeUnitType(&Buffer); } -/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE -/// for the given DITemplateTypeParameter. -DIE * -CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { - DIE *ParamDIE = getDIE(TP); - if (ParamDIE) - return ParamDIE; - - ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); - addType(ParamDIE, TP.getType()); - addString(ParamDIE, dwarf::DW_AT_name, TP.getName()); - return ParamDIE; +/// constructTemplateTypeParameterDIE - Construct new DIE for the given +/// DITemplateTypeParameter. +void +CompileUnit::constructTemplateTypeParameterDIE(DIE &Buffer, + DITemplateTypeParameter TP) { + DIE *ParamDIE = + createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer); + // Add the type if it exists, it could be void and therefore no type. + if (TP.getType()) + addType(ParamDIE, resolve(TP.getType())); + if (!TP.getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, TP.getName()); } -/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE -/// for the given DITemplateValueParameter. -DIE * -CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){ - DIE *ParamDIE = getDIE(TPV); - if (ParamDIE) - return ParamDIE; - - ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter); - addType(ParamDIE, TPV.getType()); - if (!TPV.getName().empty()) - addString(ParamDIE, dwarf::DW_AT_name, TPV.getName()); - addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, - TPV.getValue()); - return ParamDIE; +/// constructTemplateValueParameterDIE - Construct new DIE for the given +/// DITemplateValueParameter. +void +CompileUnit::constructTemplateValueParameterDIE(DIE &Buffer, + DITemplateValueParameter VP) { + DIE *ParamDIE = createAndAddDIE(VP.getTag(), Buffer); + + // Add the type if there is one, template template and template parameter + // packs will not have a type. + if (VP.getTag() == dwarf::DW_TAG_template_value_parameter) + addType(ParamDIE, resolve(VP.getType())); + if (!VP.getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, VP.getName()); + if (Value *Val = VP.getValue()) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) + addConstantValue(ParamDIE, CI, + isUnsignedDIType(DD, resolve(VP.getType()))); + else if (GlobalValue *GV = dyn_cast<GlobalValue>(Val)) { + // For declaration non-type template parameters (such as global values and + // functions) + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + addOpAddress(Block, Asm->getSymbol(GV)); + // Emit DW_OP_stack_value to use the address as the immediate value of the + // parameter, rather than a pointer to it. + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); + addBlock(ParamDIE, dwarf::DW_AT_location, Block); + } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_template_param) { + assert(isa<MDString>(Val)); + addString(ParamDIE, dwarf::DW_AT_GNU_template_name, + cast<MDString>(Val)->getString()); + } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) { + assert(isa<MDNode>(Val)); + DIArray A(cast<MDNode>(Val)); + addTemplateParams(*ParamDIE, A); + } + } } /// getOrCreateNameSpace - Create a DIE for DINameSpace. DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(NS.getContext()); + DIE *NDie = getDIE(NS); if (NDie) return NDie; - NDie = new DIE(dwarf::DW_TAG_namespace); - insertDIE(NS, NDie); + NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS); + if (!NS.getName().empty()) { addString(NDie, dwarf::DW_AT_name, NS.getName()); addAccelNamespace(NS.getName(), NDie); + addGlobalName(NS.getName(), NDie, NS.getContext()); } else addAccelNamespace("(anonymous namespace)", NDie); addSourceLine(NDie, NS); - addToContextOwner(NDie, NS.getContext()); return NDie; } -/// getRealLinkageName - If special LLVM prefix that is used to inform the asm -/// printer to not emit usual symbol prefix before the symbol name is used then -/// return linkage name after skipping this special LLVM prefix. -static StringRef getRealLinkageName(StringRef LinkageName) { - char One = '\1'; - if (LinkageName.startswith(StringRef(&One, 1))) - return LinkageName.substr(1); - return LinkageName; -} - /// getOrCreateSubprogramDIE - Create new DIE using SP. DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE (as is the case for member function + // declarations). + DIE *ContextDIE = getOrCreateContextDIE(resolve(SP.getContext())); + DIE *SPDie = getDIE(SP); if (SPDie) return SPDie; - SPDie = new DIE(dwarf::DW_TAG_subprogram); + DISubprogram SPDecl = SP.getFunctionDeclaration(); + if (SPDecl.isSubprogram()) + // Add subprogram definitions to the CU die directly. + ContextDIE = CUDie.get(); // DW_TAG_inlined_subroutine may refer to this DIE. - insertDIE(SP, SPDie); + SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP); - DISubprogram SPDecl = SP.getFunctionDeclaration(); DIE *DeclDie = NULL; - if (SPDecl.isSubprogram()) { + if (SPDecl.isSubprogram()) DeclDie = getOrCreateSubprogramDIE(SPDecl); - } - - // Add to context owner. - addToContextOwner(SPDie, SP.getContext()); // Add function template parameters. addTemplateParams(*SPDie, SP.getTemplateParams()); - // Unfortunately this code needs to stay here instead of below the - // AT_specification code in order to work around a bug in older - // gdbs that requires the linkage name to resolve multiple template - // functions. - // TODO: Remove this set of code when we get rid of the old gdb - // compatibility. - StringRef LinkageName = SP.getLinkageName(); - if (!LinkageName.empty() && DD->useDarwinGDBCompat()) - addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, - getRealLinkageName(LinkageName)); - // If this DIE is going to refer declaration info using AT_specification // then there is no need to add other attributes. if (DeclDie) { // Refer function declaration directly. - addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, - DeclDie); + addDIEEntry(SPDie, dwarf::DW_AT_specification, DeclDie); return SPDie; } // Add the linkage name if we have one. - if (!LinkageName.empty() && !DD->useDarwinGDBCompat()) + StringRef LinkageName = SP.getLinkageName(); + if (!LinkageName.empty()) addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, - getRealLinkageName(LinkageName)); + GlobalValue::getRealLinkageName(LinkageName)); // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) @@ -1183,31 +1443,31 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add the prototype if we have a prototype and we have a C like // language. + uint16_t Language = getLanguage(); if (SP.isPrototyped() && - (Language == dwarf::DW_LANG_C89 || - Language == dwarf::DW_LANG_C99 || + (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) addFlag(SPDie, dwarf::DW_AT_prototyped); - // Add Return Type. DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); + assert(SPTy.getTag() == dwarf::DW_TAG_subroutine_type && + "the type of a subprogram should be a subroutine"); - if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type) - addType(SPDie, SPTy); - else + DIArray Args = SPTy.getTypeArray(); + // Add a return type. If this is a type like a C/C++ void type we don't add a + // return type. + if (Args.getElement(0)) addType(SPDie, DIType(Args.getElement(0))); unsigned VK = SP.getVirtuality(); if (VK) { addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK); DIEBlock *Block = getDIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); - addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); - ContainingTypeMap.insert(std::make_pair(SPDie, - SP.getContainingType())); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(Block, dwarf::DW_FORM_udata, SP.getVirtualIndex()); + addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block); + ContainingTypeMap.insert( + std::make_pair(SPDie, resolve(SP.getContainingType()))); } if (!SP.isDefinition()) { @@ -1215,19 +1475,13 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add arguments. Do not add arguments for subprogram definition. They will // be handled while processing variables. - DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); - - if (SPTag == dwarf::DW_TAG_subroutine_type) - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(Args.getElement(i)); - addType(Arg, ATy); - if (ATy.isArtificial()) - addFlag(Arg, dwarf::DW_AT_artificial); - SPDie->addChild(Arg); - } + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, *SPDie); + DIType ATy(Args.getElement(i)); + addType(Arg, ATy); + if (ATy.isArtificial()) + addFlag(Arg, dwarf::DW_AT_artificial); + } } if (SP.isArtificial()) @@ -1274,16 +1528,16 @@ static const ConstantExpr *getMergedGlobalExpr(const Value *V) { } /// createGlobalVariableDIE - create global variable DIE. -void CompileUnit::createGlobalVariableDIE(const MDNode *N) { +void CompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { + // Check for pre-existence. - if (getDIE(N)) + if (getDIE(GV)) return; - DIGlobalVariable GV(N); - if (!GV.Verify()) + if (!GV.isGlobalVariable()) return; - DIDescriptor GVContext = GV.getContext(); + DIScope GVContext = GV.getContext(); DIType GTy = GV.getType(); // If this is a static data member definition, some attributes belong @@ -1294,35 +1548,30 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (SDMDecl.Verify()) { assert(SDMDecl.isStaticMember() && "Expected static member decl"); // We need the declaration DIE that is in the static member's class. - // But that class might not exist in the DWARF yet. - // Creating the class will create the static member decl DIE. - getOrCreateContextDIE(SDMDecl.getContext()); - VariableDIE = getDIE(SDMDecl); - assert(VariableDIE && "Static member decl has no context?"); + VariableDIE = getOrCreateStaticMemberDIE(SDMDecl); IsStaticMember = true; } // If this is not a static data member definition, create the variable // DIE and add the initial set of attributes to it. if (!VariableDIE) { - VariableDIE = new DIE(GV.getTag()); + // Construct the context before querying for the existence of the DIE in + // case such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(GVContext); + // Add to map. - insertDIE(N, VariableDIE); + VariableDIE = createAndAddDIE(GV.getTag(), *ContextDIE, GV); // Add name and type. addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); addType(VariableDIE, GTy); // Add scoping info. - if (!GV.isLocalToUnit()) { + if (!GV.isLocalToUnit()) addFlag(VariableDIE, dwarf::DW_AT_external); - addGlobalName(GV.getName(), VariableDIE); - } // Add line number info. addSourceLine(VariableDIE, GV); - // Add to context owner. - addToContextOwner(VariableDIE, GVContext); } // Add location. @@ -1332,57 +1581,73 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (isGlobalVariable) { addToAccelTable = true; DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addOpAddress(Block, Asm->Mang->getSymbol(GV.getGlobal())); + const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal()); + if (GV.getGlobal()->isThreadLocal()) { + // FIXME: Make this work with -gsplit-dwarf. + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); + const MCExpr *Expr = + Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym); + // Based on GCC's support for TLS: + if (!DD->useSplitDwarf()) { + // 1) Start with a constNu of the appropriate pointer size + addUInt(Block, dwarf::DW_FORM_data1, + PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); + // 2) containing the (relocated) offset of the TLS variable + // within the module's TLS block. + addExpr(Block, dwarf::DW_FORM_udata, Expr); + } else { + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(Block, dwarf::DW_FORM_udata, DU->getAddrPoolIndex(Expr)); + } + // 3) followed by a custom OP to make the debugger do a TLS lookup. + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); + } else + addOpAddress(Block, Sym); // Do not create specification DIE if context is either compile unit // or a subprogram. if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && - !GVContext.isFile() && !isSubprogramContext(GVContext)) { + !GVContext.isFile() && !DD->isSubprogramContext(GVContext)) { // Create specification DIE. - VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); - addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, - dwarf::DW_FORM_ref4, VariableDIE); - addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); + VariableSpecDIE = createAndAddDIE(dwarf::DW_TAG_variable, *CUDie); + addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, VariableDIE); + addBlock(VariableSpecDIE, dwarf::DW_AT_location, Block); // A static member's declaration is already flagged as such. if (!SDMDecl.Verify()) addFlag(VariableDIE, dwarf::DW_AT_declaration); - addDie(VariableSpecDIE); } else { - addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + addBlock(VariableDIE, dwarf::DW_AT_location, Block); } - // Add linkage name. + // Add the linkage name. StringRef LinkageName = GV.getLinkageName(); - if (!LinkageName.empty()) { + if (!LinkageName.empty()) // From DWARF4: DIEs to which DW_AT_linkage_name may apply include: // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and // TAG_variable. - addString(IsStaticMember && VariableSpecDIE ? - VariableSpecDIE : VariableDIE, dwarf::DW_AT_MIPS_linkage_name, - getRealLinkageName(LinkageName)); - // In compatibility mode with older gdbs we put the linkage name on both - // the TAG_variable DIE and on the TAG_member DIE. - if (IsStaticMember && VariableSpecDIE && DD->useDarwinGDBCompat()) - addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, - getRealLinkageName(LinkageName)); - } + addString(IsStaticMember && VariableSpecDIE ? VariableSpecDIE + : VariableDIE, + dwarf::DW_AT_MIPS_linkage_name, + GlobalValue::getRealLinkageName(LinkageName)); } else if (const ConstantInt *CI = - dyn_cast_or_null<ConstantInt>(GV.getConstant())) { + dyn_cast_or_null<ConstantInt>(GV.getConstant())) { // AT_const_value was added when the static member was created. To avoid // emitting AT_const_value multiple times, we only add AT_const_value when // it is not a static member. if (!IsStaticMember) - addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType()); - } else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { + addConstantValue(VariableDIE, CI, isUnsignedDIType(DD, GTy)); + } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getOperand(11))) { addToAccelTable = true; // GV is a merged global. DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); Value *Ptr = CE->getOperand(0); - addOpAddress(Block, Asm->Mang->getSymbol(cast<GlobalValue>(Ptr))); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end()); - addUInt(Block, 0, dwarf::DW_FORM_udata, - Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + addOpAddress(Block, Asm->getSymbol(cast<GlobalValue>(Ptr))); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end()); + addUInt(Block, dwarf::DW_FORM_udata, + Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + addBlock(VariableDIE, dwarf::DW_AT_location, Block); } if (addToAccelTable) { @@ -1395,14 +1660,16 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addAccelName(GV.getLinkageName(), AddrDIE); } - return; + if (!GV.isLocalToUnit()) + addGlobalName(GV.getName(), VariableSpecDIE ? VariableSpecDIE : VariableDIE, + GV.getContext()); } /// constructSubrangeDIE - Construct subrange DIE from DISubrange. void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { - DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); - addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); + DIE *DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer); + addDIEEntry(DW_Subrange, dwarf::DW_AT_type, IndexTy); // The LowerBound value defines the lower bounds which is typically zero for // C/C++. The Count value is the number of elements. Values are 64 bit. If @@ -1415,26 +1682,22 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, int64_t Count = SR.getCount(); if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound) - addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, LowerBound); + addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound); if (Count != -1 && Count != 0) // FIXME: An unbounded array should reference the expression that defines // the array. - addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, LowerBound + Count - 1); - - Buffer.addChild(DW_Subrange); + addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, None, + LowerBound + Count - 1); } /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. -void CompileUnit::constructArrayTypeDIE(DIE &Buffer, - DICompositeType *CTy) { - Buffer.setTag(dwarf::DW_TAG_array_type); - if (CTy->isVector()) +void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { + if (CTy.isVector()) addFlag(&Buffer, dwarf::DW_AT_GNU_vector); - // Emit derived type. - addType(&Buffer, CTy->getTypeDerivedFrom()); - DIArray Elements = CTy->getTypeArray(); + // Emit the element type. + addType(&Buffer, resolve(CTy.getTypeDerivedFrom())); // Get an anonymous type for index type. // FIXME: This type should be passed down from the front end @@ -1442,16 +1705,16 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DIE *IdxTy = getIndexTyDie(); if (!IdxTy) { // Construct an anonymous type for index type. - IdxTy = new DIE(dwarf::DW_TAG_base_type); + IdxTy = createAndAddDIE(dwarf::DW_TAG_base_type, *CUDie.get()); addString(IdxTy, dwarf::DW_AT_name, "int"); - addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); + addUInt(IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int32_t)); addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, dwarf::DW_ATE_signed); - addDie(IdxTy); setIndexTyDie(IdxTy); } // Add subranges to array type. + DIArray Elements = CTy.getTypeArray(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); if (Element.getTag() == dwarf::DW_TAG_subrange_type) @@ -1459,195 +1722,183 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, } } -/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. -DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) { - DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); - StringRef Name = ETy.getName(); - addString(Enumerator, dwarf::DW_AT_name, Name); - int64_t Value = ETy.getEnumValue(); - addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); - return Enumerator; +/// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType. +void CompileUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) { + DIArray Elements = CTy.getTypeArray(); + + // Add enumerators to enumeration type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIEnumerator Enum(Elements.getElement(i)); + if (Enum.isEnumerator()) { + DIE *Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer); + StringRef Name = Enum.getName(); + addString(Enumerator, dwarf::DW_AT_name, Name); + int64_t Value = Enum.getEnumValue(); + addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); + } + } + DIType DTy = resolve(CTy.getTypeDerivedFrom()); + if (DTy) { + addType(&Buffer, DTy); + addFlag(&Buffer, dwarf::DW_AT_enum_class); + } } /// constructContainingTypeDIEs - Construct DIEs for types that contain /// vtables. void CompileUnit::constructContainingTypeDIEs() { for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(), - CE = ContainingTypeMap.end(); CI != CE; ++CI) { + CE = ContainingTypeMap.end(); + CI != CE; ++CI) { DIE *SPDie = CI->first; - const MDNode *N = CI->second; - if (!N) continue; - DIE *NDie = getDIE(N); - if (!NDie) continue; - addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + DIDescriptor D(CI->second); + if (!D) + continue; + DIE *NDie = getDIE(D); + if (!NDie) + continue; + addDIEEntry(SPDie, dwarf::DW_AT_containing_type, NDie); } } /// constructVariableDIE - Construct a DIE for the given DbgVariable. -DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { - StringRef Name = DV->getName(); - - // Translate tag to proper Dwarf tag. - unsigned Tag = DV->getTag(); +DIE *CompileUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) { + StringRef Name = DV.getName(); // Define variable debug information entry. - DIE *VariableDie = new DIE(Tag); - DbgVariable *AbsVar = DV->getAbstractVariable(); + DIE *VariableDie = new DIE(DV.getTag()); + DbgVariable *AbsVar = DV.getAbstractVariable(); DIE *AbsDIE = AbsVar ? AbsVar->getDIE() : NULL; if (AbsDIE) - addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsDIE); + addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, AbsDIE); else { - addString(VariableDie, dwarf::DW_AT_name, Name); - addSourceLine(VariableDie, DV->getVariable()); - addType(VariableDie, DV->getType()); + if (!Name.empty()) + addString(VariableDie, dwarf::DW_AT_name, Name); + addSourceLine(VariableDie, DV.getVariable()); + addType(VariableDie, DV.getType()); } - if (DV->isArtificial()) + if (DV.isArtificial()) addFlag(VariableDie, dwarf::DW_AT_artificial); if (isScopeAbstract) { - DV->setDIE(VariableDie); + DV.setDIE(VariableDie); return VariableDie; } // Add variable address. - unsigned Offset = DV->getDotDebugLocOffset(); + unsigned Offset = DV.getDotDebugLocOffset(); if (Offset != ~0U) { - addLabel(VariableDie, dwarf::DW_AT_location, - dwarf::DW_FORM_data4, - Asm->GetTempSymbol("debug_loc", Offset)); - DV->setDIE(VariableDie); + addSectionLabel(VariableDie, dwarf::DW_AT_location, + Asm->GetTempSymbol("debug_loc", Offset)); + DV.setDIE(VariableDie); return VariableDie; } // Check if variable is described by a DBG_VALUE instruction. - if (const MachineInstr *DVInsn = DV->getMInsn()) { - bool updated = false; - if (DVInsn->getNumOperands() == 3) { - if (DVInsn->getOperand(0).isReg()) { - const MachineOperand RegOp = DVInsn->getOperand(0); - const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); - if (DVInsn->getOperand(1).isImm() && - TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) { - unsigned FrameReg = 0; - const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = - TFI->getFrameIndexReference(*Asm->MF, - DVInsn->getOperand(1).getImm(), - FrameReg); - MachineLocation Location(FrameReg, Offset); - addVariableAddress(DV, VariableDie, Location); - - } else if (RegOp.getReg()) - addVariableAddress(DV, VariableDie, - MachineLocation(RegOp.getReg())); - updated = true; - } - else if (DVInsn->getOperand(0).isImm()) - updated = - addConstantValue(VariableDie, DVInsn->getOperand(0), - DV->getType()); - else if (DVInsn->getOperand(0).isFPImm()) - updated = - addConstantFPValue(VariableDie, DVInsn->getOperand(0)); - else if (DVInsn->getOperand(0).isCImm()) - updated = - addConstantValue(VariableDie, - DVInsn->getOperand(0).getCImm(), - DV->getType().isUnsignedDIType()); - } else { - addVariableAddress(DV, VariableDie, - Asm->getDebugValueLocation(DVInsn)); - updated = true; - } - if (!updated) { - // If variableDie is not updated then DBG_VALUE instruction does not - // have valid variable info. - delete VariableDie; - return NULL; - } - DV->setDIE(VariableDie); + if (const MachineInstr *DVInsn = DV.getMInsn()) { + assert(DVInsn->getNumOperands() == 3); + if (DVInsn->getOperand(0).isReg()) { + const MachineOperand RegOp = DVInsn->getOperand(0); + // If the second operand is an immediate, this is an indirect value. + if (DVInsn->getOperand(1).isImm()) { + MachineLocation Location(RegOp.getReg(), + DVInsn->getOperand(1).getImm()); + addVariableAddress(DV, VariableDie, Location); + } else if (RegOp.getReg()) + addVariableAddress(DV, VariableDie, MachineLocation(RegOp.getReg())); + } else if (DVInsn->getOperand(0).isImm()) + addConstantValue(VariableDie, DVInsn->getOperand(0), DV.getType()); + else if (DVInsn->getOperand(0).isFPImm()) + addConstantFPValue(VariableDie, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isCImm()) + addConstantValue(VariableDie, DVInsn->getOperand(0).getCImm(), + isUnsignedDIType(DD, DV.getType())); + + DV.setDIE(VariableDie); return VariableDie; } else { // .. else use frame index. - int FI = DV->getFrameIndex(); + int FI = DV.getFrameIndex(); if (FI != ~0) { unsigned FrameReg = 0; const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = - TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); MachineLocation Location(FrameReg, Offset); addVariableAddress(DV, VariableDie, Location); } } - DV->setDIE(VariableDie); + DV.setDIE(VariableDie); return VariableDie; } -/// createMemberDIE - Create new member DIE. -DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { - DIE *MemberDie = new DIE(DT.getTag()); +/// constructMemberDIE - Construct member DIE from DIDerivedType. +void CompileUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { + DIE *MemberDie = createAndAddDIE(DT.getTag(), Buffer); StringRef Name = DT.getName(); if (!Name.empty()) addString(MemberDie, dwarf::DW_AT_name, Name); - addType(MemberDie, DT.getTypeDerivedFrom()); + addType(MemberDie, resolve(DT.getTypeDerivedFrom())); addSourceLine(MemberDie, DT); - DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - - uint64_t Size = DT.getSizeInBits(); - uint64_t FieldSize = DT.getOriginalTypeSize(); - - if (Size != FieldSize) { - // Handle bitfield. - addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); - addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); - - uint64_t Offset = DT.getOffsetInBits(); - uint64_t AlignMask = ~(DT.getAlignInBits() - 1); - uint64_t HiMark = (Offset + FieldSize) & AlignMask; - uint64_t FieldOffset = (HiMark - FieldSize); - Offset -= FieldOffset; - - // Maybe we need to work from the other end. - if (Asm->getDataLayout().isLittleEndian()) - Offset = FieldSize - (Offset + Size); - addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); - - // Here WD_AT_data_member_location points to the anonymous - // field that includes this bit field. - addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3); - - } else - // This is not a bitfield. - addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); - - if (DT.getTag() == dwarf::DW_TAG_inheritance - && DT.isVirtual()) { + if (DT.getTag() == dwarf::DW_TAG_inheritance && DT.isVirtual()) { // For C++, virtual base classes are not at fixed offset. Use following // expression to extract appropriate offset from vtable. // BaseAddr = ObAddr + *((*ObAddr) - Offset) DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits()); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, - VBaseLocationDie); - } else - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits()); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + + addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie); + } else { + uint64_t Size = DT.getSizeInBits(); + uint64_t FieldSize = getBaseTypeSize(DD, DT); + uint64_t OffsetInBytes; + + if (Size != FieldSize) { + // Handle bitfield. + addUInt(MemberDie, dwarf::DW_AT_byte_size, None, + getBaseTypeSize(DD, DT) >> 3); + addUInt(MemberDie, dwarf::DW_AT_bit_size, None, DT.getSizeInBits()); + + uint64_t Offset = DT.getOffsetInBits(); + uint64_t AlignMask = ~(DT.getAlignInBits() - 1); + uint64_t HiMark = (Offset + FieldSize) & AlignMask; + uint64_t FieldOffset = (HiMark - FieldSize); + Offset -= FieldOffset; + + // Maybe we need to work from the other end. + if (Asm->getDataLayout().isLittleEndian()) + Offset = FieldSize - (Offset + Size); + addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset); + + // Here WD_AT_data_member_location points to the anonymous + // field that includes this bit field. + OffsetInBytes = FieldOffset >> 3; + } else + // This is not a bitfield. + OffsetInBytes = DT.getOffsetInBits() >> 3; + + if (DD->getDwarfVersion() <= 2) { + DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); + addUInt(MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes); + addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie); + } else + addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, + OffsetInBytes); + } if (DT.isProtected()) addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, @@ -1671,17 +1922,26 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { if (DT.isArtificial()) addFlag(MemberDie, dwarf::DW_AT_artificial); - - return MemberDie; } -/// createStaticMemberDIE - Create new DIE for C++ static member. -DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) { +/// getOrCreateStaticMemberDIE - Create new DIE for C++ static member. +DIE *CompileUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { if (!DT.Verify()) return NULL; - DIE *StaticMemberDIE = new DIE(DT.getTag()); - DIType Ty = DT.getTypeDerivedFrom(); + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(resolve(DT.getContext())); + assert(dwarf::isType(ContextDIE->getTag()) && + "Static member should belong to a type."); + + DIE *StaticMemberDIE = getDIE(DT); + if (StaticMemberDIE) + return StaticMemberDIE; + + StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT); + + DIType Ty = resolve(DT.getTypeDerivedFrom()); addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName()); addType(StaticMemberDIE, Ty); @@ -1702,10 +1962,20 @@ DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) { dwarf::DW_ACCESS_public); if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant())) - addConstantValue(StaticMemberDIE, CI, Ty.isUnsignedDIType()); + addConstantValue(StaticMemberDIE, CI, isUnsignedDIType(DD, Ty)); if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant())) addConstantFPValue(StaticMemberDIE, CFP); - insertDIE(DT, StaticMemberDIE); return StaticMemberDIE; } + +void CompileUnit::emitHeader(const MCSection *ASection, + const MCSymbol *ASectionSym) { + Asm->OutStreamer.AddComment("DWARF version number"); + Asm->EmitInt16(DD->getDwarfVersion()); + Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); + Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()), + ASectionSym); + Asm->OutStreamer.AddComment("Address Size (in bytes)"); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 8f08c63..69a96df 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -15,15 +15,16 @@ #define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H #include "DIE.h" +#include "DwarfDebug.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/StringMap.h" #include "llvm/DebugInfo.h" +#include "llvm/MC/MCExpr.h" namespace llvm { -class DwarfDebug; -class DwarfUnits; class MachineLocation; class MachineOperand; class ConstantInt; @@ -38,11 +39,10 @@ class CompileUnit { /// unsigned UniqueID; - /// Language - The DW_AT_language of the compile unit - /// - unsigned Language; + /// Node - MDNode for the compile unit. + DICompileUnit Node; - /// Die - Compile unit debug information entry. + /// CUDie - Compile unit debug information entry. /// const OwningPtr<DIE> CUDie; @@ -56,28 +56,28 @@ class CompileUnit { /// IndexTyDie - An anonymous type for index type. Owned by CUDie. DIE *IndexTyDie; - /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton + /// MDNodeToDieMap - Tracks the mapping of unit level debug information /// variables to debug information entries. DenseMap<const MDNode *, DIE *> MDNodeToDieMap; - /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton + /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug information /// descriptors to debug information entries using a DIEEntry proxy. DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; /// GlobalNames - A map of globally visible named entities for this unit. /// - StringMap<DIE*> GlobalNames; + StringMap<DIE *> GlobalNames; /// GlobalTypes - A map of globally visible types for this unit. /// - StringMap<DIE*> GlobalTypes; + StringMap<DIE *> GlobalTypes; /// AccelNames - A map of names for the name accelerator table. /// - StringMap<std::vector<DIE*> > AccelNames; - StringMap<std::vector<DIE*> > AccelObjC; - StringMap<std::vector<DIE*> > AccelNamespace; - StringMap<std::vector<std::pair<DIE*, unsigned> > > AccelTypes; + StringMap<std::vector<DIE *> > AccelNames; + StringMap<std::vector<DIE *> > AccelObjC; + StringMap<std::vector<DIE *> > AccelNamespace; + StringMap<std::vector<std::pair<DIE *, unsigned> > > AccelTypes; /// DIEBlocks - A list of all the DIEBlocks in use. std::vector<DIEBlock *> DIEBlocks; @@ -87,163 +87,161 @@ class CompileUnit { /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap<DIE *, const MDNode *> ContainingTypeMap; - /// Offset of the CUDie from beginning of debug info section. - unsigned DebugInfoOffset; + // DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; - /// getLowerBoundDefault - Return the default lower bound for an array. If the - /// DWARF version doesn't handle the language, return -1. - int64_t getDefaultLowerBound() const; + // DIEIntegerOne - A preallocated DIEValue because 1 is used frequently. + DIEInteger *DIEIntegerOne; public: - CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW, - DwarfUnits *); + CompileUnit(unsigned UID, DIE *D, DICompileUnit CU, AsmPrinter *A, + DwarfDebug *DW, DwarfUnits *DWU); ~CompileUnit(); // Accessors. - unsigned getUniqueID() const { return UniqueID; } - unsigned getLanguage() const { return Language; } - DIE* getCUDie() const { return CUDie.get(); } - unsigned getDebugInfoOffset() const { return DebugInfoOffset; } - const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; } - const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; } - - const StringMap<std::vector<DIE*> > &getAccelNames() const { + unsigned getUniqueID() const { return UniqueID; } + uint16_t getLanguage() const { return Node.getLanguage(); } + DICompileUnit getNode() const { return Node; } + DIE *getCUDie() const { return CUDie.get(); } + const StringMap<DIE *> &getGlobalNames() const { return GlobalNames; } + const StringMap<DIE *> &getGlobalTypes() const { return GlobalTypes; } + + const StringMap<std::vector<DIE *> > &getAccelNames() const { return AccelNames; } - const StringMap<std::vector<DIE*> > &getAccelObjC() const { + const StringMap<std::vector<DIE *> > &getAccelObjC() const { return AccelObjC; } - const StringMap<std::vector<DIE*> > &getAccelNamespace() const { + const StringMap<std::vector<DIE *> > &getAccelNamespace() const { return AccelNamespace; } - const StringMap<std::vector<std::pair<DIE*, unsigned > > > - &getAccelTypes() const { + const StringMap<std::vector<std::pair<DIE *, unsigned> > > & + getAccelTypes() const { return AccelTypes; } + unsigned getDebugInfoOffset() const { return DebugInfoOffset; } void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } + /// hasContent - Return true if this compile unit has something to write out. /// bool hasContent() const { return !CUDie->getChildren().empty(); } + /// getParentContextString - Get a string containing the language specific + /// context for a global name. + std::string getParentContextString(DIScope Context) const; + /// addGlobalName - Add a new global entity to the compile unit. /// - void addGlobalName(StringRef Name, DIE *Die) { GlobalNames[Name] = Die; } + void addGlobalName(StringRef Name, DIE *Die, DIScope Context); /// addGlobalType - Add a new global type to the compile unit. /// void addGlobalType(DIType Ty); + /// addPubTypes - Add a set of types from the subprogram to the global types. + void addPubTypes(DISubprogram SP); /// addAccelName - Add a new name to the name accelerator table. - void addAccelName(StringRef Name, DIE *Die) { - std::vector<DIE*> &DIEs = AccelNames[Name]; - DIEs.push_back(Die); - } - void addAccelObjC(StringRef Name, DIE *Die) { - std::vector<DIE*> &DIEs = AccelObjC[Name]; - DIEs.push_back(Die); - } - void addAccelNamespace(StringRef Name, DIE *Die) { - std::vector<DIE*> &DIEs = AccelNamespace[Name]; - DIEs.push_back(Die); - } - void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) { - std::vector<std::pair<DIE*, unsigned > > &DIEs = AccelTypes[Name]; - DIEs.push_back(Die); - } + void addAccelName(StringRef Name, DIE *Die); - /// getDIE - Returns the debug information entry map slot for the - /// specified debug variable. - DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } + /// addAccelObjC - Add a new name to the ObjC accelerator table. + void addAccelObjC(StringRef Name, DIE *Die); - DIEBlock *getDIEBlock() { - return new (DIEValueAllocator) DIEBlock(); - } + /// addAccelNamespace - Add a new name to the namespace accelerator table. + void addAccelNamespace(StringRef Name, DIE *Die); - /// insertDIE - Insert DIE into the map. - void insertDIE(const MDNode *N, DIE *D) { - MDNodeToDieMap.insert(std::make_pair(N, D)); - } + /// addAccelType - Add a new type to the type accelerator table. + void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die); - /// getDIEEntry - Returns the debug information entry for the specified - /// debug variable. - DIEEntry *getDIEEntry(const MDNode *N) { - DenseMap<const MDNode *, DIEEntry *>::iterator I = - MDNodeToDIEEntryMap.find(N); - if (I == MDNodeToDIEEntryMap.end()) - return NULL; - return I->second; - } + /// getDIE - Returns the debug information entry map slot for the + /// specified debug variable. We delegate the request to DwarfDebug + /// when the MDNode can be part of the type system, since DIEs for + /// the type system can be shared across CUs and the mappings are + /// kept in DwarfDebug. + DIE *getDIE(DIDescriptor D) const; - /// insertDIEEntry - Insert debug information entry into the map. - void insertDIEEntry(const MDNode *N, DIEEntry *E) { - MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); - } + DIEBlock *getDIEBlock() { return new (DIEValueAllocator) DIEBlock(); } + + /// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug + /// when the MDNode can be part of the type system, since DIEs for + /// the type system can be shared across CUs and the mappings are + /// kept in DwarfDebug. + void insertDIE(DIDescriptor Desc, DIE *D); /// addDie - Adds or interns the DIE to the compile unit. /// - void addDie(DIE *Buffer) { - this->CUDie->addChild(Buffer); - } - - // getIndexTyDie - Get an anonymous type for index type. - DIE *getIndexTyDie() { - return IndexTyDie; - } - - // setIndexTyDie - Set D as anonymous type for index which can be reused - // later. - void setIndexTyDie(DIE *D) { - IndexTyDie = D; - } + void addDie(DIE *Buffer) { CUDie->addChild(Buffer); } /// addFlag - Add a flag that is true to the DIE. - void addFlag(DIE *Die, unsigned Attribute); + void addFlag(DIE *Die, dwarf::Attribute Attribute); /// addUInt - Add an unsigned integer attribute data and value. /// - void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); + void addUInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, + uint64_t Integer); + + void addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer); /// addSInt - Add an signed integer attribute data and value. /// - void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); + void addSInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, + int64_t Integer); + + void addSInt(DIEBlock *Die, Optional<dwarf::Form> Form, int64_t Integer); /// addString - Add a string attribute data and value. /// - void addString(DIE *Die, unsigned Attribute, const StringRef Str); + void addString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str); /// addLocalString - Add a string attribute data and value. /// - void addLocalString(DIE *Die, unsigned Attribute, const StringRef Str); + void addLocalString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str); + + /// addExpr - Add a Dwarf expression attribute data and value. + /// + void addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr); /// addLabel - Add a Dwarf label attribute data and value. /// - void addLabel(DIE *Die, unsigned Attribute, unsigned Form, + void addLabel(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form, const MCSymbol *Label); + void addLabel(DIEBlock *Die, dwarf::Form Form, const MCSymbol *Label); + + /// addSectionLabel - Add a Dwarf section label attribute data and value. + /// + void addSectionLabel(DIE *Die, dwarf::Attribute Attribute, const MCSymbol *Label); + + /// addSectionOffset - Add an offset into a section attribute data and value. + /// + void addSectionOffset(DIE *Die, dwarf::Attribute Attribute, uint64_t Integer); + /// addLabelAddress - Add a dwarf label attribute data and value using /// either DW_FORM_addr or DW_FORM_GNU_addr_index. /// - void addLabelAddress(DIE *Die, unsigned Attribute, MCSymbol *Label); + void addLabelAddress(DIE *Die, dwarf::Attribute Attribute, MCSymbol *Label); /// addOpAddress - Add a dwarf op address data and value using the /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. /// - void addOpAddress(DIE *Die, MCSymbol *Label); + void addOpAddress(DIEBlock *Die, const MCSymbol *Label); - /// addDelta - Add a label delta attribute data and value. + /// addSectionDelta - Add a label delta attribute data and value. + void addSectionDelta(DIE *Die, dwarf::Attribute Attribute, const MCSymbol *Hi, + const MCSymbol *Lo); + + /// addDIEEntry - Add a DIE attribute data and value. /// - void addDelta(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Hi, const MCSymbol *Lo); + void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry); /// addDIEEntry - Add a DIE attribute data and value. /// - void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry); + void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIEEntry *Entry); /// addBlock - Add block data. /// - void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); + void addBlock(DIE *Die, dwarf::Attribute Attribute, DIEBlock *Block); /// addSourceLine - Add location information to specified debug information /// entry. @@ -256,33 +254,33 @@ public: /// addAddress - Add an address attribute to a die based on the location /// provided. - void addAddress(DIE *Die, unsigned Attribute, - const MachineLocation &Location); + void addAddress(DIE *Die, dwarf::Attribute Attribute, const MachineLocation &Location, + bool Indirect = false); /// addConstantValue - Add constant value entry in variable DIE. - bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); - bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); - bool addConstantValue(DIE *Die, const APInt &Val, bool Unsigned); + void addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); + void addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); + void addConstantValue(DIE *Die, const APInt &Val, bool Unsigned); /// addConstantFPValue - Add constant value entry in variable DIE. - bool addConstantFPValue(DIE *Die, const MachineOperand &MO); - bool addConstantFPValue(DIE *Die, const ConstantFP *CFP); + void addConstantFPValue(DIE *Die, const MachineOperand &MO); + void addConstantFPValue(DIE *Die, const ConstantFP *CFP); /// addTemplateParams - Add template parameters in buffer. void addTemplateParams(DIE &Buffer, DIArray TParams); /// addRegisterOp - Add register operand. - void addRegisterOp(DIE *TheDie, unsigned Reg); + void addRegisterOp(DIEBlock *TheDie, unsigned Reg); /// addRegisterOffset - Add register offset. - void addRegisterOffset(DIE *TheDie, unsigned Reg, int64_t Offset); + void addRegisterOffset(DIEBlock *TheDie, unsigned Reg, int64_t Offset); /// addComplexAddress - Start with the address based on the location provided, /// and generate the DWARF information necessary to find the actual variable /// (navigating the extra location information encoded in the type) based on /// the starting location. Add the DWARF information to the die. /// - void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + void addComplexAddress(const DbgVariable &DV, DIE *Die, dwarf::Attribute Attribute, const MachineLocation &Location); // FIXME: Should be reformulated in terms of addComplexAddress. @@ -292,20 +290,18 @@ public: /// starting location. Add the DWARF information to the die. Obsolete, /// please use addComplexAddress instead. /// - void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + void addBlockByrefAddress(const DbgVariable &DV, DIE *Die, dwarf::Attribute Attribute, const MachineLocation &Location); /// addVariableAddress - Add DW_AT_location attribute for a /// DbgVariable based on provided MachineLocation. - void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location); - - /// addToContextOwner - Add Die into the list of its context owner's children. - void addToContextOwner(DIE *Die, DIDescriptor Context); + void addVariableAddress(const DbgVariable &DV, DIE *Die, + MachineLocation Location); /// addType - Add a new type attribute to the specified entity. This takes /// and attribute parameter because DW_AT_friend attributes are also /// type references. - void addType(DIE *Entity, DIType Ty, unsigned Attribute = dwarf::DW_AT_type); + void addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute = dwarf::DW_AT_type); /// getOrCreateNameSpace - Create a DIE for DINameSpace. DIE *getOrCreateNameSpace(DINameSpace NS); @@ -317,66 +313,103 @@ public: /// given DIType. DIE *getOrCreateTypeDIE(const MDNode *N); - /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE - /// for the given DITemplateTypeParameter. - DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP); + /// getOrCreateContextDIE - Get context owner's DIE. + DIE *getOrCreateContextDIE(DIScope Context); - /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create - /// new DIE for the given DITemplateValueParameter. - DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP); + /// createGlobalVariableDIE - create global variable DIE. + void createGlobalVariableDIE(DIGlobalVariable GV); - /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug - /// information entry. - DIEEntry *createDIEEntry(DIE *Entry); + /// constructContainingTypeDIEs - Construct DIEs for types that contain + /// vtables. + void constructContainingTypeDIEs(); - /// createGlobalVariableDIE - create global variable DIE. - void createGlobalVariableDIE(const MDNode *N); + /// constructVariableDIE - Construct a DIE for the given DbgVariable. + DIE *constructVariableDIE(DbgVariable &DV, bool isScopeAbstract); + + /// Create a DIE with the given Tag, add the DIE to its parent, and + /// call insertDIE if MD is not null. + DIE *createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N = DIDescriptor()); + + /// Compute the size of a header for this unit, not including the initial + /// length field. + unsigned getHeaderSize() const { + return sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t); // Pointer Size (in bytes) + } - void addPubTypes(DISubprogram SP); + /// Emit the header for this unit, not including the initial length field. + void emitHeader(const MCSection *ASection, const MCSymbol *ASectionSym); +private: /// constructTypeDIE - Construct basic type die from DIBasicType. - void constructTypeDIE(DIE &Buffer, - DIBasicType BTy); + void constructTypeDIE(DIE &Buffer, DIBasicType BTy); /// constructTypeDIE - Construct derived type die from DIDerivedType. - void constructTypeDIE(DIE &Buffer, - DIDerivedType DTy); + void constructTypeDIE(DIE &Buffer, DIDerivedType DTy); /// constructTypeDIE - Construct type DIE from DICompositeType. - void constructTypeDIE(DIE &Buffer, - DICompositeType CTy); + void constructTypeDIE(DIE &Buffer, DICompositeType CTy); /// constructSubrangeDIE - Construct subrange DIE from DISubrange. void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. - void constructArrayTypeDIE(DIE &Buffer, - DICompositeType *CTy); + void constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy); /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. - DIE *constructEnumTypeDIE(DIEnumerator ETy); + void constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy); - /// constructContainingTypeDIEs - Construct DIEs for types that contain - /// vtables. - void constructContainingTypeDIEs(); + /// constructMemberDIE - Construct member DIE from DIDerivedType. + void constructMemberDIE(DIE &Buffer, DIDerivedType DT); - /// constructVariableDIE - Construct a DIE for the given DbgVariable. - DIE *constructVariableDIE(DbgVariable *DV, bool isScopeAbstract); + /// constructTemplateTypeParameterDIE - Construct new DIE for the given + /// DITemplateTypeParameter. + void constructTemplateTypeParameterDIE(DIE &Buffer, + DITemplateTypeParameter TP); - /// createMemberDIE - Create new member DIE. - DIE *createMemberDIE(DIDerivedType DT); + /// constructTemplateValueParameterDIE - Construct new DIE for the given + /// DITemplateValueParameter. + void constructTemplateValueParameterDIE(DIE &Buffer, + DITemplateValueParameter TVP); - /// createStaticMemberDIE - Create new static data member DIE. - DIE *createStaticMemberDIE(DIDerivedType DT); + /// getOrCreateStaticMemberDIE - Create new static data member DIE. + DIE *getOrCreateStaticMemberDIE(DIDerivedType DT); - /// getOrCreateContextDIE - Get context owner's DIE. - DIE *getOrCreateContextDIE(DIDescriptor Context); + /// Offset of the CUDie from beginning of debug info section. + unsigned DebugInfoOffset; -private: + /// getLowerBoundDefault - Return the default lower bound for an array. If the + /// DWARF version doesn't handle the language, return -1. + int64_t getDefaultLowerBound() const; - // DIEValueAllocator - All DIEValues are allocated through this allocator. - BumpPtrAllocator DIEValueAllocator; - DIEInteger *DIEIntegerOne; + /// getDIEEntry - Returns the debug information entry for the specified + /// debug variable. + DIEEntry *getDIEEntry(const MDNode *N) const { + return MDNodeToDIEEntryMap.lookup(N); + } + + /// insertDIEEntry - Insert debug information entry into the map. + void insertDIEEntry(const MDNode *N, DIEEntry *E) { + MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); + } + + // getIndexTyDie - Get an anonymous type for index type. + DIE *getIndexTyDie() { return IndexTyDie; } + + // setIndexTyDie - Set D as anonymous type for index which can be reused + // later. + void setIndexTyDie(DIE *D) { IndexTyDie = D; } + + /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug + /// information entry. + DIEEntry *createDIEEntry(DIE *Entry); + + /// resolve - Look in the DwarfDebug map for the MDNode that + /// corresponds to the reference. + template <typename T> T resolve(DIRef<T> Ref) const { + return DD->resolve(Ref); + } }; } // end llvm namespace diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 1e706cc..d1e1ad1 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "dwarfdebug" #include "DwarfDebug.h" #include "DIE.h" +#include "DIEHash.h" #include "DwarfAccelTable.h" #include "DwarfCompileUnit.h" #include "llvm/ADT/STLExtras.h" @@ -34,8 +35,10 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" #include "llvm/Support/Timer.h" #include "llvm/Support/ValueHandle.h" @@ -46,61 +49,69 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", - cl::Hidden, - cl::desc("Disable debug info printing")); +static cl::opt<bool> +DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, + cl::desc("Disable debug info printing")); -static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden, - cl::desc("Make an absence of debug location information explicit."), - cl::init(false)); +static cl::opt<bool> UnknownLocations( + "use-unknown-locations", cl::Hidden, + cl::desc("Make an absence of debug location information explicit."), + cl::init(false)); -static cl::opt<bool> GenerateDwarfPubNamesSection("generate-dwarf-pubnames", - cl::Hidden, cl::init(false), - cl::desc("Generate DWARF pubnames section")); +static cl::opt<bool> +GenerateODRHash("generate-odr-hash", cl::Hidden, + cl::desc("Add an ODR hash to external type DIEs."), + cl::init(false)); -namespace { - enum DefaultOnOff { - Default, Enable, Disable - }; -} - -static cl::opt<DefaultOnOff> DwarfAccelTables("dwarf-accel-tables", cl::Hidden, - cl::desc("Output prototype dwarf accelerator tables."), - cl::values( - clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled"), - clEnumValEnd), - cl::init(Default)); - -static cl::opt<DefaultOnOff> DarwinGDBCompat("darwin-gdb-compat", cl::Hidden, - cl::desc("Compatibility with Darwin gdb."), - cl::values( - clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled"), - clEnumValEnd), - cl::init(Default)); - -static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden, - cl::desc("Output prototype dwarf split debug info."), - cl::values( - clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled"), - clEnumValEnd), - cl::init(Default)); +static cl::opt<bool> +GenerateCUHash("generate-cu-hash", cl::Hidden, + cl::desc("Add the CU hash as the dwo_id."), + cl::init(false)); -namespace { - const char *DWARFGroupName = "DWARF Emission"; - const char *DbgTimerName = "DWARF Debug Writer"; +static cl::opt<bool> +GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden, + cl::desc("Generate GNU-style pubnames and pubtypes"), + cl::init(false)); - struct CompareFirst { - template <typename T> bool operator()(const T &lhs, const T &rhs) const { - return lhs.first < rhs.first; - } - }; -} // end anonymous namespace +namespace { +enum DefaultOnOff { + Default, + Enable, + Disable +}; +} + +static cl::opt<DefaultOnOff> +DwarfAccelTables("dwarf-accel-tables", cl::Hidden, + cl::desc("Output prototype dwarf accelerator tables."), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); + +static cl::opt<DefaultOnOff> +SplitDwarf("split-dwarf", cl::Hidden, + cl::desc("Output prototype dwarf split debug info."), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); + +static cl::opt<DefaultOnOff> +DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, + cl::desc("Generate DWARF pubnames and pubtypes sections"), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); + +static cl::opt<unsigned> +DwarfVersionNumber("dwarf-version", cl::Hidden, + cl::desc("Generate DWARF for dwarf version."), + cl::init(0)); + +static const char *const DWARFGroupName = "DWARF Emission"; +static const char *const DbgTimerName = "DWARF Debug Writer"; //===----------------------------------------------------------------------===// @@ -110,6 +121,13 @@ static const unsigned InitAbbreviationsSetSize = 9; // log2(512) namespace llvm { +/// resolve - Look in the DwarfDebug map for the MDNode that +/// corresponds to the reference. +template <typename T> +T DbgVariable::resolve(DIRef<T> Ref) const { + return DD->resolve(Ref); +} + DIType DbgVariable::getType() const { DIType Ty = Var.getType(); // FIXME: isBlockByrefVariable should be reformulated in terms of complex @@ -140,21 +158,16 @@ DIType DbgVariable::getType() const { the pointers and __Block_byref_x_VarName struct to find the actual value of the variable. The function addBlockByrefType does this. */ DIType subType = Ty; - unsigned tag = Ty.getTag(); - - if (tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - subType = DTy.getTypeDerivedFrom(); - } + uint16_t tag = Ty.getTag(); - DICompositeType blockStruct = DICompositeType(subType); - DIArray Elements = blockStruct.getTypeArray(); + if (tag == dwarf::DW_TAG_pointer_type) + subType = resolve(DIDerivedType(Ty).getTypeDerivedFrom()); + DIArray Elements = DICompositeType(subType).getTypeArray(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - DIDerivedType DT = DIDerivedType(Element); + DIDerivedType DT(Elements.getElement(i)); if (getName() == DT.getName()) - return (DT.getTypeDerivedFrom()); + return (resolve(DT.getTypeDerivedFrom())); } } return Ty; @@ -162,15 +175,23 @@ DIType DbgVariable::getType() const { } // end llvm namespace +/// Return Dwarf Version by checking module flags. +static unsigned getDwarfVersionFromModule(const Module *M) { + Value *Val = M->getModuleFlag("Dwarf Version"); + if (!Val) + return dwarf::DWARF_VERSION; + return cast<ConstantInt>(Val)->getZExtValue(); +} + DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : Asm(A), MMI(Asm->MMI), FirstCU(0), AbbreviationsSet(InitAbbreviationsSetSize), SourceIdMap(DIEValueAllocator), PrevLabel(NULL), GlobalCUIndexCount(0), - InfoHolder(A, &AbbreviationsSet, &Abbreviations, "info_string", + InfoHolder(A, &AbbreviationsSet, Abbreviations, "info_string", DIEValueAllocator), SkeletonAbbrevSet(InitAbbreviationsSetSize), - SkeletonHolder(A, &SkeletonAbbrevSet, &SkeletonAbbrevs, "skel_string", + SkeletonHolder(A, &SkeletonAbbrevSet, SkeletonAbbrevs, "skel_string", DIEValueAllocator) { DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; @@ -180,37 +201,34 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; - // Turn on accelerator tables and older gdb compatibility - // for Darwin. + // Turn on accelerator tables for Darwin by default, pubnames by + // default for non-Darwin, and handle split dwarf. bool IsDarwin = Triple(A->getTargetTriple()).isOSDarwin(); - if (DarwinGDBCompat == Default) { - if (IsDarwin) - IsDarwinGDBCompat = true; - else - IsDarwinGDBCompat = false; - } else - IsDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false; - if (DwarfAccelTables == Default) { - if (IsDarwin) - HasDwarfAccelTables = true; - else - HasDwarfAccelTables = false; - } else - HasDwarfAccelTables = DwarfAccelTables == Enable ? true : false; + if (DwarfAccelTables == Default) + HasDwarfAccelTables = IsDarwin; + else + HasDwarfAccelTables = DwarfAccelTables == Enable; if (SplitDwarf == Default) HasSplitDwarf = false; else - HasSplitDwarf = SplitDwarf == Enable ? true : false; + HasSplitDwarf = SplitDwarf == Enable; + + if (DwarfPubSections == Default) + HasDwarfPubSections = !IsDarwin; + else + HasDwarfPubSections = DwarfPubSections == Enable; + + DwarfVersion = DwarfVersionNumber + ? DwarfVersionNumber + : getDwarfVersionFromModule(MMI->getModule()); { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(); } } -DwarfDebug::~DwarfDebug() { -} // Switch to the specified MCSection and emit an assembler // temporary label to it if SymbolStem is specified. @@ -247,48 +265,37 @@ unsigned DwarfUnits::getStringPoolIndex(StringRef Str) { return Entry.second; } -unsigned DwarfUnits::getAddrPoolIndex(MCSymbol *Sym) { - std::pair<MCSymbol*, unsigned> &Entry = AddressPool[Sym]; - if (Entry.first) return Entry.second; +unsigned DwarfUnits::getAddrPoolIndex(const MCSymbol *Sym) { + return getAddrPoolIndex(MCSymbolRefExpr::Create(Sym, Asm->OutContext)); +} - Entry.second = NextAddrPoolNumber++; - Entry.first = Sym; - return Entry.second; +unsigned DwarfUnits::getAddrPoolIndex(const MCExpr *Sym) { + std::pair<DenseMap<const MCExpr *, unsigned>::iterator, bool> P = + AddressPool.insert(std::make_pair(Sym, NextAddrPoolNumber)); + if (P.second) + ++NextAddrPoolNumber; + return P.first->second; } // Define a unique number for the abbreviation. // void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) { - // Profile the node so that we can make it unique. - FoldingSetNodeID ID; - Abbrev.Profile(ID); - // Check the set for priors. DIEAbbrev *InSet = AbbreviationsSet->GetOrInsertNode(&Abbrev); // If it's newly added. if (InSet == &Abbrev) { // Add to abbreviation list. - Abbreviations->push_back(&Abbrev); + Abbreviations.push_back(&Abbrev); // Assign the vector position + 1 as its number. - Abbrev.setNumber(Abbreviations->size()); + Abbrev.setNumber(Abbreviations.size()); } else { // Assign existing abbreviation number. Abbrev.setNumber(InSet->getNumber()); } } -// If special LLVM prefix that is used to inform the asm -// printer to not emit usual symbol prefix before the symbol name is used then -// return linkage name after skipping this special LLVM prefix. -static StringRef getRealLinkageName(StringRef LinkageName) { - char One = '\1'; - if (LinkageName.startswith(StringRef(&One, 1))) - return LinkageName.substr(1); - return LinkageName; -} - static bool isObjCClass(StringRef Name) { return Name.startswith("+") || Name.startswith("-"); } @@ -296,12 +303,7 @@ static bool isObjCClass(StringRef Name) { static bool hasObjCCategory(StringRef Name) { if (!isObjCClass(Name)) return false; - size_t pos = Name.find(')'); - if (pos != std::string::npos) { - if (Name[pos+1] != ' ') return false; - return true; - } - return false; + return Name.find(") ") != StringRef::npos; } static void getObjCClassCategory(StringRef In, StringRef &Class, @@ -321,11 +323,20 @@ static StringRef getObjCMethodName(StringRef In) { return In.slice(In.find(' ') + 1, In.find(']')); } +// Helper for sorting sections into a stable output order. +static bool SectionSort(const MCSection *A, const MCSection *B) { + std::string LA = (A ? A->getLabelBeginName() : ""); + std::string LB = (B ? B->getLabelBeginName() : ""); + return LA < LB; +} + // Add the various names to the Dwarf accelerator table names. +// TODO: Determine whether or not we should add names for programs +// that do not have a DW_AT_name or DW_AT_linkage_name field - this +// is only slightly different than the lookup of non-standard ObjC names. static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP, DIE* Die) { if (!SP.isDefinition()) return; - TheCU->addAccelName(SP.getName(), Die); // If the linkage name is different than the name, go ahead and output @@ -346,30 +357,34 @@ static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP, } } +/// isSubprogramContext - Return true if Context is either a subprogram +/// or another context nested inside a subprogram. +bool DwarfDebug::isSubprogramContext(const MDNode *Context) { + if (!Context) + return false; + DIDescriptor D(Context); + if (D.isSubprogram()) + return true; + if (D.isType()) + return isSubprogramContext(resolve(DIType(Context).getContext())); + return false; +} + // Find DIE for the given subprogram and attach appropriate DW_AT_low_pc // and DW_AT_high_pc attributes. If there are global variables in this // scope then create and insert DIEs for these variables. -DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, - const MDNode *SPNode) { - DIE *SPDie = SPCU->getDIE(SPNode); +DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP) { + DIE *SPDie = SPCU->getDIE(SP); assert(SPDie && "Unable to find subprogram DIE!"); - DISubprogram SP(SPNode); // If we're updating an abstract DIE, then we will be adding the children and // object pointer later on. But what we don't want to do is process the // concrete DIE twice. - DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode); - if (AbsSPDIE) { - bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie()); + if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) { // Pick up abstract subprogram DIE. - SPDie = new DIE(dwarf::DW_TAG_subprogram); - // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of - // DW_FORM_ref4. - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, - InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, - AbsSPDIE); - SPCU->addDie(SPDie); + SPDie = SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie()); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, AbsSPDIE); } else { DISubprogram SPDecl = SP.getFunctionDeclaration(); if (!SPDecl.isSubprogram()) { @@ -378,32 +393,31 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, // function then gdb prefers the definition at top level and but does not // expect specification DIE in parent function. So avoid creating // specification DIE for a function defined inside a function. - if (SP.isDefinition() && !SP.getContext().isCompileUnit() && - !SP.getContext().isFile() && - !isSubprogramContext(SP.getContext())) { + DIScope SPContext = resolve(SP.getContext()); + if (SP.isDefinition() && !SPContext.isCompileUnit() && + !SPContext.isFile() && + !isSubprogramContext(SPContext)) { SPCU->addFlag(SPDie, dwarf::DW_AT_declaration); // Add arguments. DICompositeType SPTy = SP.getType(); DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); + uint16_t SPTag = SPTy.getTag(); if (SPTag == dwarf::DW_TAG_subroutine_type) for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(Args.getElement(i)); + DIE *Arg = + SPCU->createAndAddDIE(dwarf::DW_TAG_formal_parameter, *SPDie); + DIType ATy(Args.getElement(i)); SPCU->addType(Arg, ATy); if (ATy.isArtificial()) SPCU->addFlag(Arg, dwarf::DW_AT_artificial); if (ATy.isObjectPointer()) - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, - dwarf::DW_FORM_ref4, Arg); - SPDie->addChild(Arg); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, Arg); } DIE *SPDeclDie = SPDie; - SPDie = new DIE(dwarf::DW_TAG_subprogram); - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, - dwarf::DW_FORM_ref4, SPDeclDie); - SPCU->addDie(SPDie); + SPDie = + SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie()); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, SPDeclDie); } } } @@ -425,40 +439,64 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, return SPDie; } +/// Check whether we should create a DIE for the given Scope, return true +/// if we don't create a DIE (the corresponding DIE is null). +bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { + if (Scope->isAbstractScope()) + return false; + + // We don't create a DIE if there is no Range. + const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges(); + if (Ranges.empty()) + return true; + + if (Ranges.size() > 1) + return false; + + // We don't create a DIE if we have a single Range and the end label + // is null. + SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(); + MCSymbol *End = getLabelAfterInsn(RI->second); + return !End; +} + // Construct new DW_TAG_lexical_block for this scope and attach // DW_AT_low_pc/DW_AT_high_pc labels. DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { + if (isLexicalScopeDIENull(Scope)) + return 0; + DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block); if (Scope->isAbstractScope()) return ScopeDIE; - const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges(); - if (Ranges.empty()) - return 0; - - SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(); + const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges(); + // If we have multiple ranges, emit them into the range section. if (Ranges.size() > 1) { // .debug_range section has not been laid out yet. Emit offset in // .debug_range as a uint, size 4, for now. emitDIE will handle // DW_AT_ranges appropriately. - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, - DebugRangeSymbols.size() - * Asm->getDataLayout().getPointerSize()); - for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), + TheCU->addSectionOffset(ScopeDIE, dwarf::DW_AT_ranges, + DebugRangeSymbols.size() * + Asm->getDataLayout().getPointerSize()); + for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); } + + // Terminate the range list. DebugRangeSymbols.push_back(NULL); DebugRangeSymbols.push_back(NULL); return ScopeDIE; } + // Construct the address range for this DIE. + SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(); MCSymbol *Start = getLabelBeforeInsn(RI->first); MCSymbol *End = getLabelAfterInsn(RI->second); - - if (End == 0) return 0; + assert(End && "End label should not be null!"); assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); assert(End->isDefined() && "Invalid end label for an inlined scope!"); @@ -473,7 +511,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, // represent this concrete inlined copy of the function. DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { - const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges(); + const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges(); assert(Ranges.empty() == false && "LexicalScope does not have instruction markers!"); @@ -487,30 +525,17 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, return NULL; } - SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(); - MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); - MCSymbol *EndLabel = getLabelAfterInsn(RI->second); - - if (StartLabel == 0 || EndLabel == 0) { - llvm_unreachable("Unexpected Start and End labels for an inlined scope!"); - } - assert(StartLabel->isDefined() && - "Invalid starting label for an inlined scope!"); - assert(EndLabel->isDefined() && - "Invalid end label for an inlined scope!"); - DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); - TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, OriginDIE); + TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, OriginDIE); if (Ranges.size() > 1) { // .debug_range section has not been laid out yet. Emit offset in // .debug_range as a uint, size 4, for now. emitDIE will handle // DW_AT_ranges appropriately. - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, - DebugRangeSymbols.size() - * Asm->getDataLayout().getPointerSize()); - for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), + TheCU->addSectionOffset(ScopeDIE, dwarf::DW_AT_ranges, + DebugRangeSymbols.size() * + Asm->getDataLayout().getPointerSize()); + for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); @@ -518,31 +543,29 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, DebugRangeSymbols.push_back(NULL); DebugRangeSymbols.push_back(NULL); } else { + SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(); + MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); + MCSymbol *EndLabel = getLabelAfterInsn(RI->second); + + if (StartLabel == 0 || EndLabel == 0) + llvm_unreachable("Unexpected Start and End labels for an inlined scope!"); + + assert(StartLabel->isDefined() && + "Invalid starting label for an inlined scope!"); + assert(EndLabel->isDefined() && "Invalid end label for an inlined scope!"); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, StartLabel); TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, EndLabel); } InlinedSubprogramDIEs.insert(OriginDIE); - // Track the start label for this inlined function. - //.debug_inlined section specification does not clearly state how - // to emit inlined scope that is split into multiple instruction ranges. - // For now, use first instruction range and emit low_pc/high_pc pair and - // corresponding .debug_inlined section entry for this pair. - DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator - I = InlineInfo.find(InlinedSP); - - if (I == InlineInfo.end()) { - InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE)); - InlinedSPNodes.push_back(InlinedSP); - } else - I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); - + // Add the call site information to the DIE. DILocation DL(Scope->getInlinedAt()); - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, None, getOrCreateSourceID(DL.getFilename(), DL.getDirectory(), TheCU->getUniqueID())); - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber()); // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_inlined_subprogram nodes. @@ -551,42 +574,49 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, return ScopeDIE; } -// Construct a DIE for this scope. -DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { - if (!Scope || !Scope->getScopeNode()) - return NULL; - - DIScope DS(Scope->getScopeNode()); - // Early return to avoid creating dangling variable|scope DIEs. - if (!Scope->getInlinedAt() && DS.isSubprogram() && Scope->isAbstractScope() && - !TheCU->getDIE(DS)) - return NULL; - - SmallVector<DIE *, 8> Children; - DIE *ObjectPointer = NULL; +DIE *DwarfDebug::createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope, + SmallVectorImpl<DIE*> &Children) { + DIE *ObjectPointer = NULL; // Collect arguments for current function. if (LScopes.isCurrentFunctionScope(Scope)) for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i) if (DbgVariable *ArgDV = CurrentFnArguments[i]) if (DIE *Arg = - TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) { + TheCU->constructVariableDIE(*ArgDV, Scope->isAbstractScope())) { Children.push_back(Arg); if (ArgDV->isObjectPointer()) ObjectPointer = Arg; } // Collect lexical scope children first. - const SmallVector<DbgVariable *, 8> &Variables = ScopeVariables.lookup(Scope); + const SmallVectorImpl<DbgVariable *> &Variables =ScopeVariables.lookup(Scope); for (unsigned i = 0, N = Variables.size(); i < N; ++i) if (DIE *Variable = - TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) { + TheCU->constructVariableDIE(*Variables[i], Scope->isAbstractScope())) { Children.push_back(Variable); if (Variables[i]->isObjectPointer()) ObjectPointer = Variable; } - const SmallVector<LexicalScope *, 4> &Scopes = Scope->getChildren(); + const SmallVectorImpl<LexicalScope *> &Scopes = Scope->getChildren(); for (unsigned j = 0, M = Scopes.size(); j < M; ++j) if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j])) Children.push_back(Nested); + return ObjectPointer; +} + +// Construct a DIE for this scope. +DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { + if (!Scope || !Scope->getScopeNode()) + return NULL; + + DIScope DS(Scope->getScopeNode()); + + SmallVector<DIE *, 8> Children; + DIE *ObjectPointer = NULL; + bool ChildrenCreated = false; + + // We try to create the scope DIE first, then the children DIEs. This will + // avoid creating un-used children then removing them later when we find out + // the scope DIE is null. DIE *ScopeDIE = NULL; if (Scope->getInlinedAt()) ScopeDIE = constructInlinedScopeDIE(TheCU, Scope); @@ -597,34 +627,49 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { // Note down abstract DIE. if (ScopeDIE) AbstractSPDies.insert(std::make_pair(DS, ScopeDIE)); - } - else - ScopeDIE = updateSubprogramScopeDIE(TheCU, DS); - } - else { + } else + ScopeDIE = updateSubprogramScopeDIE(TheCU, DISubprogram(DS)); + } else { + // Early exit when we know the scope DIE is going to be null. + if (isLexicalScopeDIENull(Scope)) + return NULL; + + // We create children here when we know the scope DIE is not going to be + // null and the children will be added to the scope DIE. + ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children); + ChildrenCreated = true; + // There is no need to emit empty lexical block DIE. std::pair<ImportedEntityMap::const_iterator, ImportedEntityMap::const_iterator> Range = std::equal_range( ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(), std::pair<const MDNode *, const MDNode *>(DS, (const MDNode*)0), - CompareFirst()); + less_first()); if (Children.empty() && Range.first == Range.second) return NULL; ScopeDIE = constructLexicalScopeDIE(TheCU, Scope); - for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; ++i) - constructImportedModuleDIE(TheCU, i->second, ScopeDIE); + assert(ScopeDIE && "Scope DIE should not be null."); + for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; + ++i) + constructImportedEntityDIE(TheCU, i->second, ScopeDIE); } - if (!ScopeDIE) return NULL; + if (!ScopeDIE) { + assert(Children.empty() && + "We create children only when the scope DIE is not null."); + return NULL; + } + if (!ChildrenCreated) + // We create children when the scope DIE is not null. + ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children); // Add children - for (SmallVector<DIE *, 8>::iterator I = Children.begin(), + for (SmallVectorImpl<DIE *>::iterator I = Children.begin(), E = Children.end(); I != E; ++I) ScopeDIE->addChild(*I); if (DS.isSubprogram() && ObjectPointer != NULL) - TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, - dwarf::DW_FORM_ref4, ObjectPointer); + TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, ObjectPointer); if (DS.isSubprogram()) TheCU->addPubTypes(DISubprogram(DS)); @@ -640,8 +685,10 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, StringRef DirName, unsigned CUID) { // If we use .loc in assembly, we can't separate .file entries according to // compile units. Thus all files will belong to the default compile unit. - if (Asm->TM.hasMCUseLoc() && - Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) + + // FIXME: add a better feature test than hasRawTextSupport. Even better, + // extend .file to support this. + if (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) CUID = 0; // If FE did not provide a file name, then assume stdin. @@ -676,14 +723,12 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, // Create new CompileUnit for the given metadata node with tag // DW_TAG_compile_unit. -CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { - DICompileUnit DIUnit(N); +CompileUnit *DwarfDebug::constructCompileUnit(DICompileUnit DIUnit) { StringRef FN = DIUnit.getFilename(); CompilationDir = DIUnit.getDirectory(); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, - DIUnit.getLanguage(), Die, Asm, + CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, Die, DIUnit, Asm, this, &InfoHolder); FileIDCUMap[NewCU->getUniqueID()] = 0; @@ -710,31 +755,56 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { // Use a single line table if we are using .loc and generating assembly. bool UseTheFirstCU = - (Asm->TM.hasMCUseLoc() && - Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) || - (NewCU->getUniqueID() == 0); + (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) || + (NewCU->getUniqueID() == 0); - // DW_AT_stmt_list is a offset of line number information for this - // compile unit in debug_line section. For split dwarf this is - // left in the skeleton CU and so not included. - // The line table entries are not always emitted in assembly, so it - // is not okay to use line_table_start here. if (!useSplitDwarf()) { + // DW_AT_stmt_list is a offset of line number information for this + // compile unit in debug_line section. For split dwarf this is + // left in the skeleton CU and so not included. + // The line table entries are not always emitted in assembly, so it + // is not okay to use line_table_start here. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - UseTheFirstCU ? - Asm->GetTempSymbol("section_line") : LineTableStartSym); + NewCU->addSectionLabel( + Die, dwarf::DW_AT_stmt_list, + UseTheFirstCU ? Asm->GetTempSymbol("section_line") + : LineTableStartSym); else if (UseTheFirstCU) - NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + NewCU->addSectionOffset(Die, dwarf::DW_AT_stmt_list, 0); else - NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - LineTableStartSym, DwarfLineSectionSym); + NewCU->addSectionDelta(Die, dwarf::DW_AT_stmt_list, + LineTableStartSym, DwarfLineSectionSym); + + // If we're using split dwarf the compilation dir is going to be in the + // skeleton CU and so we don't need to duplicate it here. + if (!CompilationDir.empty()) + NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + + // Flags to let the linker know we have emitted new style pubnames. Only + // emit it here if we don't have a skeleton CU for split dwarf. + if (GenerateGnuPubSections) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addSectionLabel( + Die, dwarf::DW_AT_GNU_pubnames, + Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID())); + else + NewCU->addSectionDelta( + Die, dwarf::DW_AT_GNU_pubnames, + Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID()), + DwarfGnuPubNamesSectionSym); + + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addSectionLabel( + Die, dwarf::DW_AT_GNU_pubtypes, + Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID())); + else + NewCU->addSectionDelta( + Die, dwarf::DW_AT_GNU_pubtypes, + Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID()), + DwarfGnuPubTypesSectionSym); + } } - // If we're using split dwarf the compilation dir is going to be in the - // skeleton CU and so we don't need to duplicate it here. - if (!useSplitDwarf() && !CompilationDir.empty()) - NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); if (DIUnit.isOptimized()) NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized); @@ -751,13 +821,17 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { InfoHolder.addUnit(NewCU); - CUMap.insert(std::make_pair(N, NewCU)); + CUMap.insert(std::make_pair(DIUnit, NewCU)); + CUDieMap.insert(std::make_pair(Die, NewCU)); return NewCU; } // Construct subprogram DIE. -void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, - const MDNode *N) { +void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) { + // FIXME: We should only call this routine once, however, during LTO if a + // program is defined in multiple CUs we could end up calling it out of + // beginModule as we walk the CUs. + CompileUnit *&CURef = SPMap[N]; if (CURef) return; @@ -771,49 +845,54 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, DIE *SubprogramDie = TheCU->getOrCreateSubprogramDIE(SP); - // Add to map. - TheCU->insertDIE(N, SubprogramDie); - - // Add to context owner. - TheCU->addToContextOwner(SubprogramDie, SP.getContext()); - - // Expose as global, if requested. - if (GenerateDwarfPubNamesSection) - TheCU->addGlobalName(SP.getName(), SubprogramDie); + // Expose as a global name. + TheCU->addGlobalName(SP.getName(), SubprogramDie, resolve(SP.getContext())); } -void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, +void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N) { - DIImportedModule Module(N); + DIImportedEntity Module(N); if (!Module.Verify()) return; if (DIE *D = TheCU->getOrCreateContextDIE(Module.getContext())) - constructImportedModuleDIE(TheCU, Module, D); + constructImportedEntityDIE(TheCU, Module, D); } -void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N, +void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N, DIE *Context) { - DIImportedModule Module(N); + DIImportedEntity Module(N); if (!Module.Verify()) return; - return constructImportedModuleDIE(TheCU, Module, Context); + return constructImportedEntityDIE(TheCU, Module, Context); } -void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, - const DIImportedModule &Module, +void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, + const DIImportedEntity &Module, DIE *Context) { assert(Module.Verify() && "Use one of the MDNode * overloads to handle invalid metadata"); assert(Context && "Should always have a context for an imported_module"); - DIE *IMDie = new DIE(dwarf::DW_TAG_imported_module); + DIE *IMDie = new DIE(Module.getTag()); TheCU->insertDIE(Module, IMDie); - DIE *NSDie = TheCU->getOrCreateNameSpace(Module.getNameSpace()); + DIE *EntityDie; + DIDescriptor Entity = Module.getEntity(); + if (Entity.isNameSpace()) + EntityDie = TheCU->getOrCreateNameSpace(DINameSpace(Entity)); + else if (Entity.isSubprogram()) + EntityDie = TheCU->getOrCreateSubprogramDIE(DISubprogram(Entity)); + else if (Entity.isType()) + EntityDie = TheCU->getOrCreateTypeDIE(DIType(Entity)); + else + EntityDie = TheCU->getDIE(Entity); unsigned FileID = getOrCreateSourceID(Module.getContext().getFilename(), Module.getContext().getDirectory(), TheCU->getUniqueID()); - TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, 0, FileID); - TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, 0, Module.getLineNumber()); - TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, dwarf::DW_FORM_ref4, NSDie); + TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, None, FileID); + TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, None, Module.getLineNumber()); + TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, EntityDie); + StringRef Name = Module.getName(); + if (!Name.empty()) + TheCU->addString(IMDie, dwarf::DW_AT_name, Name); Context->addChild(IMDie); } @@ -831,6 +910,7 @@ void DwarfDebug::beginModule() { NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return; + TypeIdentifierMap = generateDITypeIdentifierMap(CU_Nodes); // Emit initial sections so we can reference labels later. emitSectionLabels(); @@ -838,16 +918,16 @@ void DwarfDebug::beginModule() { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { DICompileUnit CUNode(CU_Nodes->getOperand(i)); CompileUnit *CU = constructCompileUnit(CUNode); - DIArray ImportedModules = CUNode.getImportedModules(); - for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i) + DIArray ImportedEntities = CUNode.getImportedEntities(); + for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) ScopesWithImportedEntities.push_back(std::make_pair( - DIImportedModule(ImportedModules.getElement(i)).getContext(), - ImportedModules.getElement(i))); + DIImportedEntity(ImportedEntities.getElement(i)).getContext(), + ImportedEntities.getElement(i))); std::sort(ScopesWithImportedEntities.begin(), - ScopesWithImportedEntities.end(), CompareFirst()); + ScopesWithImportedEntities.end(), less_first()); DIArray GVs = CUNode.getGlobalVariables(); for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) - CU->createGlobalVariableDIE(GVs.getElement(i)); + CU->createGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i))); DIArray SPs = CUNode.getSubprograms(); for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) constructSubprogramDIE(CU, SPs.getElement(i)); @@ -859,24 +939,15 @@ void DwarfDebug::beginModule() { CU->getOrCreateTypeDIE(RetainedTypes.getElement(i)); // Emit imported_modules last so that the relevant context is already // available. - for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i) - constructImportedModuleDIE(CU, ImportedModules.getElement(i)); - // If we're splitting the dwarf out now that we've got the entire - // CU then construct a skeleton CU based upon it. - if (useSplitDwarf()) { - // This should be a unique identifier when we want to build .dwp files. - CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, - dwarf::DW_FORM_data8, 0); - // Now construct the skeleton CU associated. - constructSkeletonCU(CUNode); - } + for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) + constructImportedEntityDIE(CU, ImportedEntities.getElement(i)); } // Tell MMI that we have debug info. MMI->setDebugInfoAvailability(true); // Prime section data. - SectionMap.insert(Asm->getObjFileLowering().getTextSection()); + SectionMap[Asm->getObjFileLowering().getTextSection()]; } // Attach DW_AT_inline attribute with inlined subprogram DIEs. @@ -885,21 +956,20 @@ void DwarfDebug::computeInlinedDIEs() { for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(), AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { DIE *ISP = *AI; - FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); } for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(), AE = AbstractSPDies.end(); AI != AE; ++AI) { DIE *ISP = AI->second; if (InlinedSubprogramDIEs.count(ISP)) continue; - FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); } } // Collect info for variables that were optimized out. void DwarfDebug::collectDeadVariables() { const Module *M = MMI->getModule(); - DenseMap<const MDNode *, LexicalScope *> DeadFnScopeMap; if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { @@ -907,33 +977,70 @@ void DwarfDebug::collectDeadVariables() { DIArray Subprograms = TheCU.getSubprograms(); for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { DISubprogram SP(Subprograms.getElement(i)); - if (ProcessedSPNodes.count(SP) != 0) continue; - if (!SP.Verify()) continue; - if (!SP.isDefinition()) continue; + if (ProcessedSPNodes.count(SP) != 0) + continue; + if (!SP.isSubprogram()) + continue; + if (!SP.isDefinition()) + continue; DIArray Variables = SP.getVariables(); - if (Variables.getNumElements() == 0) continue; - - LexicalScope *Scope = - new LexicalScope(NULL, DIDescriptor(SP), NULL, false); - DeadFnScopeMap[SP] = Scope; + if (Variables.getNumElements() == 0) + continue; // Construct subprogram DIE and add variables DIEs. CompileUnit *SPCU = CUMap.lookup(TheCU); assert(SPCU && "Unable to find Compile Unit!"); + // FIXME: See the comment in constructSubprogramDIE about duplicate + // subprogram DIEs. constructSubprogramDIE(SPCU, SP); - DIE *ScopeDIE = SPCU->getDIE(SP); + DIE *SPDIE = SPCU->getDIE(SP); for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { DIVariable DV(Variables.getElement(vi)); - if (!DV.Verify()) continue; - DbgVariable *NewVar = new DbgVariable(DV, NULL); + if (!DV.isVariable()) + continue; + DbgVariable NewVar(DV, NULL, this); if (DIE *VariableDIE = - SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope())) - ScopeDIE->addChild(VariableDIE); + SPCU->constructVariableDIE(NewVar, false)) + SPDIE->addChild(VariableDIE); } } } } - DeleteContainerSeconds(DeadFnScopeMap); +} + +// Type Signature [7.27] and ODR Hash code. + +/// \brief Grabs the string in whichever attribute is passed in and returns +/// a reference to it. Returns "" if the attribute doesn't exist. +static StringRef getDIEStringAttr(DIE *Die, unsigned Attr) { + DIEValue *V = Die->findAttribute(Attr); + + if (DIEString *S = dyn_cast_or_null<DIEString>(V)) + return S->getString(); + + return StringRef(""); +} + +/// Return true if the current DIE is contained within an anonymous namespace. +static bool isContainedInAnonNamespace(DIE *Die) { + DIE *Parent = Die->getParent(); + + while (Parent) { + if (Parent->getTag() == dwarf::DW_TAG_namespace && + getDIEStringAttr(Parent, dwarf::DW_AT_name) == "") + return true; + Parent = Parent->getParent(); + } + + return false; +} + +/// Test if the current CU language is C++ and that we have +/// a named type that is not contained in an anonymous namespace. +static bool shouldAddODRHash(CompileUnit *CU, DIE *Die) { + return CU->getLanguage() == dwarf::DW_LANG_C_plus_plus && + getDIEStringAttr(Die, dwarf::DW_AT_name) != "" && + !isContainedInAnonNamespace(Die); } void DwarfDebug::finalizeModuleInfo() { @@ -943,31 +1050,102 @@ void DwarfDebug::finalizeModuleInfo() { // Attach DW_AT_inline attribute with inlined subprogram DIEs. computeInlinedDIEs(); - // Emit DW_AT_containing_type attribute to connect types with their - // vtable holding type. + // Split out type units and conditionally add an ODR tag to the split + // out type. + // FIXME: Do type splitting. + for (unsigned i = 0, e = TypeUnits.size(); i != e; ++i) { + DIE *Die = TypeUnits[i]; + DIEHash Hash; + // If we've requested ODR hashes and it's applicable for an ODR hash then + // add the ODR signature now. + // FIXME: This should be added onto the type unit, not the type, but this + // works as an intermediate stage. + if (GenerateODRHash && shouldAddODRHash(CUMap.begin()->second, Die)) + CUMap.begin()->second->addUInt(Die, dwarf::DW_AT_GNU_odr_signature, + dwarf::DW_FORM_data8, + Hash.computeDIEODRSignature(*Die)); + } + + // Handle anything that needs to be done on a per-cu basis. for (DenseMap<const MDNode *, CompileUnit *>::iterator CUI = CUMap.begin(), - CUE = CUMap.end(); CUI != CUE; ++CUI) { + CUE = CUMap.end(); + CUI != CUE; ++CUI) { CompileUnit *TheCU = CUI->second; + // Emit DW_AT_containing_type attribute to connect types with their + // vtable holding type. TheCU->constructContainingTypeDIEs(); + + // If we're splitting the dwarf out now that we've got the entire + // CU then construct a skeleton CU based upon it. + if (useSplitDwarf()) { + uint64_t ID = 0; + if (GenerateCUHash) { + DIEHash CUHash; + ID = CUHash.computeCUSignature(*TheCU->getCUDie()); + } + // This should be a unique identifier when we want to build .dwp files. + TheCU->addUInt(TheCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + // Now construct the skeleton CU associated. + CompileUnit *SkCU = constructSkeletonCU(TheCU); + // This should be a unique identifier when we want to build .dwp files. + SkCU->addUInt(SkCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + } } - // Compute DIE offsets and sizes. + // Compute DIE offsets and sizes. InfoHolder.computeSizeAndOffsets(); if (useSplitDwarf()) SkeletonHolder.computeSizeAndOffsets(); } void DwarfDebug::endSections() { - // Standard sections final addresses. - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection()); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end")); - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection()); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end")); + // Filter labels by section. + for (size_t n = 0; n < ArangeLabels.size(); n++) { + const SymbolCU &SCU = ArangeLabels[n]; + if (SCU.Sym->isInSection()) { + // Make a note of this symbol and it's section. + const MCSection *Section = &SCU.Sym->getSection(); + if (!Section->getKind().isMetadata()) + SectionMap[Section].push_back(SCU); + } else { + // Some symbols (e.g. common/bss on mach-o) can have no section but still + // appear in the output. This sucks as we rely on sections to build + // arange spans. We can do it without, but it's icky. + SectionMap[NULL].push_back(SCU); + } + } - // End text sections. - for (unsigned I = 0, E = SectionMap.size(); I != E; ++I) { - Asm->OutStreamer.SwitchSection(SectionMap[I]); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", I+1)); + // Build a list of sections used. + std::vector<const MCSection *> Sections; + for (SectionMapType::iterator it = SectionMap.begin(); it != SectionMap.end(); + it++) { + const MCSection *Section = it->first; + Sections.push_back(Section); + } + + // Sort the sections into order. + // This is only done to ensure consistent output order across different runs. + std::sort(Sections.begin(), Sections.end(), SectionSort); + + // Add terminating symbols for each section. + for (unsigned ID=0;ID<Sections.size();ID++) { + const MCSection *Section = Sections[ID]; + MCSymbol *Sym = NULL; + + if (Section) { + // We can't call MCSection::getLabelEndName, as it's only safe to do so + // if we know the section name up-front. For user-created sections, the resulting + // label may not be valid to use as a label. (section names can use a greater + // set of characters on some systems) + Sym = Asm->GetTempSymbol("debug_end", ID); + Asm->OutStreamer.SwitchSection(Section); + Asm->OutStreamer.EmitLabel(Sym); + } + + // Insert a final terminator. + SectionMap[Section].push_back(SymbolCU(NULL, Sym)); } } @@ -984,6 +1162,8 @@ void DwarfDebug::endModule() { finalizeModuleInfo(); if (!useSplitDwarf()) { + emitDebugStr(); + // Emit all the DIEs into a debug info section. emitDebugInfo(); @@ -1002,15 +1182,12 @@ void DwarfDebug::endModule() { // Emit info into a debug macinfo section. emitDebugMacInfo(); - // Emit inline info. - // TODO: When we don't need the option anymore we - // can remove all of the code that this section - // depends upon. - if (useDarwinGDBCompat()) - emitDebugInlineInfo(); } else { // TODO: Fill this in for separated debug sections and separate // out information into new sections. + emitDebugStr(); + if (useSplitDwarf()) + emitDebugStrDWO(); // Emit the debug info section and compile units. emitDebugInfo(); @@ -1035,12 +1212,6 @@ void DwarfDebug::endModule() { // Emit DWO addresses. InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection()); - // Emit inline info. - // TODO: When we don't need the option anymore we - // can remove all of the code that this section - // depends upon. - if (useDarwinGDBCompat()) - emitDebugInlineInfo(); } // Emit info into the dwarf accelerator table sections. @@ -1051,20 +1222,11 @@ void DwarfDebug::endModule() { emitAccelTypes(); } - // Emit info into a debug pubnames section, if requested. - if (GenerateDwarfPubNamesSection) - emitDebugPubnames(); - - // Emit info into a debug pubtypes section. - // TODO: When we don't need the option anymore we can - // remove all of the code that adds to the table. - if (useDarwinGDBCompat()) - emitDebugPubTypes(); - - // Finally emit string information into a string table. - emitDebugStr(); - if (useSplitDwarf()) - emitDebugStrDWO(); + // Emit the pubnames and pubtypes sections if requested. + if (HasDwarfPubSections) { + emitDebugPubNames(GenerateGnuPubSections); + emitDebugPubTypes(GenerateGnuPubSections); + } // clean up. SPMap.clear(); @@ -1072,7 +1234,7 @@ void DwarfDebug::endModule() { E = CUMap.end(); I != E; ++I) delete I->second; - for (SmallVector<CompileUnit *, 1>::iterator I = SkeletonCUs.begin(), + for (SmallVectorImpl<CompileUnit *>::iterator I = SkeletonCUs.begin(), E = SkeletonCUs.end(); I != E; ++I) delete *I; @@ -1094,7 +1256,7 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, if (!Scope) return NULL; - AbsDbgVariable = new DbgVariable(Var, NULL); + AbsDbgVariable = new DbgVariable(Var, NULL, this); addScopeVariable(Scope, AbsDbgVariable); AbstractVariables[Var] = AbsDbgVariable; return AbsDbgVariable; @@ -1143,7 +1305,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF, continue; DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second); - DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable); + DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable, this); RegVar->setFrameIndex(VP.first); if (!addCurrentFnArgument(MF, RegVar, Scope)) addScopeVariable(Scope, RegVar); @@ -1158,7 +1320,8 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) { assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); return MI->getNumOperands() == 3 && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() && - MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0; + (MI->getOperand(1).isImm() || + (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == 0U)); } // Get .debug_loc entry for the instruction range starting at MI. @@ -1168,16 +1331,12 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, const MachineInstr *MI) { const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata(); - if (MI->getNumOperands() != 3) { - MachineLocation MLoc = Asm->getDebugValueLocation(MI); - return DotDebugLocEntry(FLabel, SLabel, MLoc, Var); - } - if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) { + assert(MI->getNumOperands() == 3); + if (MI->getOperand(0).isReg()) { MachineLocation MLoc; - // TODO: Currently an offset of 0 in a DBG_VALUE means - // we need to generate a direct register value. - // There is no way to specify an indirect value with offset 0. - if (MI->getOperand(1).getImm() == 0) + // If the second operand is an immediate, this is a + // register-indirect address. + if (!MI->getOperand(1).isImm()) MLoc.set(MI->getOperand(0).getReg()); else MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); @@ -1198,7 +1357,7 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF, SmallPtrSet<const MDNode *, 16> &Processed) { - // collection info from MMI table. + // Grab the variable info that was squirreled away in the MMI side-table. collectVariableInfoFromMMITable(MF, Processed); for (SmallVectorImpl<const MDNode*>::const_iterator @@ -1231,7 +1390,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, Processed.insert(DV); assert(MInsn->isDebugValue() && "History must begin with debug value"); DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc()); - DbgVariable *RegVar = new DbgVariable(DV, AbsVar); + DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this); if (!addCurrentFnArgument(MF, RegVar, Scope)) addScopeVariable(Scope, RegVar); if (AbsVar) @@ -1291,10 +1450,10 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables(); for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { DIVariable DV(Variables.getElement(i)); - if (!DV || !DV.Verify() || !Processed.insert(DV)) + if (!DV || !DV.isVariable() || !Processed.insert(DV)) continue; if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) - addScopeVariable(Scope, new DbgVariable(DV, NULL)); + addScopeVariable(Scope, new DbgVariable(DV, NULL, this)); } } @@ -1388,19 +1547,19 @@ void DwarfDebug::identifyScopeMarkers() { while (!WorkList.empty()) { LexicalScope *S = WorkList.pop_back_val(); - const SmallVector<LexicalScope *, 4> &Children = S->getChildren(); + const SmallVectorImpl<LexicalScope *> &Children = S->getChildren(); if (!Children.empty()) - for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(), + for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(), SE = Children.end(); SI != SE; ++SI) WorkList.push_back(*SI); if (S->isAbstractScope()) continue; - const SmallVector<InsnRange, 4> &Ranges = S->getRanges(); + const SmallVectorImpl<InsnRange> &Ranges = S->getRanges(); if (Ranges.empty()) continue; - for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), + for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { assert(RI->first && "InsnRange does not have first instruction!"); assert(RI->second && "InsnRange does not have second instruction!"); @@ -1422,7 +1581,7 @@ static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) { static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { const MDNode *Scope = getScopeNode(DL, Ctx); DISubprogram SP = getDISubprogram(Scope); - if (SP.Verify()) { + if (SP.isSubprogram()) { // Check for number of operands since the compatibility is // cheap here. if (SP->getNumOperands() > 19) @@ -1437,36 +1596,45 @@ static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { // Gather pre-function debug information. Assumes being called immediately // after the function entry point has been emitted. void DwarfDebug::beginFunction(const MachineFunction *MF) { - if (!MMI->hasDebugInfo()) return; + + // If there's no debug info for the function we're not going to do anything. + if (!MMI->hasDebugInfo()) + return; + + // Grab the lexical scopes for the function, if we don't have any of those + // then we're not going to be able to do anything. LScopes.initialize(*MF); - if (LScopes.empty()) return; + if (LScopes.empty()) + return; + + assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); + + // Make sure that each lexical scope will have a begin/end label. identifyScopeMarkers(); // Set DwarfCompileUnitID in MCContext to the Compile Unit this function - // belongs to. + // belongs to so that we add to the correct per-cu line table in the + // non-asm case. LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); assert(TheCU && "Unable to find compile unit!"); - if (Asm->TM.hasMCUseLoc() && - Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) + if (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) // Use a single line table if we are using .loc and generating assembly. Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); else Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); - FunctionBeginSym = Asm->GetTempSymbol("func_begin", - Asm->getFunctionNumber()); + // Emit a label for the function so that we have a beginning address. + FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()); // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionBeginSym); - assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); - const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); // LiveUserVar - Map physreg numbers to the MDNode they contain. - std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs()); + std::vector<const MDNode *> LiveUserVar(TRI->getNumRegs()); - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) { + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; + ++I) { bool AtBlockEntry = true; for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { @@ -1477,22 +1645,21 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Keep track of user variables. const MDNode *Var = - MI->getOperand(MI->getNumOperands() - 1).getMetadata(); + MI->getOperand(MI->getNumOperands() - 1).getMetadata(); // Variable is in a register, we need to check for clobbers. if (isDbgValueInDefinedReg(MI)) LiveUserVar[MI->getOperand(0).getReg()] = Var; // Check the history of this variable. - SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var]; + SmallVectorImpl<const MachineInstr *> &History = DbgValues[Var]; if (History.empty()) { UserVariables.push_back(Var); // The first mention of a function argument gets the FunctionBeginSym // label, so arguments are visible when breaking at function entry. DIVariable DV(Var); - if (DV.Verify() && DV.getTag() == dwarf::DW_TAG_arg_variable && - DISubprogram(getDISubprogram(DV.getContext())) - .describes(MF->getFunction())) + if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable && + getDISubprogram(DV.getContext()).describes(MF->getFunction())) LabelsBeforeInsn[MI] = FunctionBeginSym; } else { // We have seen this variable before. Try to coalesce DBG_VALUEs. @@ -1502,8 +1669,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (History.size() >= 2 && Prev->isIdenticalTo(History[History.size() - 2])) { DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" - << "\t" << *Prev - << "\t" << *History[History.size() - 2] << "\n"); + << "\t" << *Prev << "\t" + << *History[History.size() - 2] << "\n"); History.pop_back(); } @@ -1514,17 +1681,15 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Previous register assignment needs to terminate at the end of // its basic block. MachineBasicBlock::const_iterator LastMI = - PrevMBB->getLastNonDebugInstr(); + PrevMBB->getLastNonDebugInstr(); if (LastMI == PrevMBB->end()) { // Drop DBG_VALUE for empty range. DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n" - << "\t" << *Prev << "\n"); + << "\t" << *Prev << "\n"); History.pop_back(); - } - else { + } else if (llvm::next(PrevMBB) != PrevMBB->getParent()->end()) // Terminate after LastMI. History.push_back(LastMI); - } } } } @@ -1542,11 +1707,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Check if the instruction clobbers any registers with debug vars. for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { + MOE = MI->operands_end(); + MOI != MOE; ++MOI) { if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg()) continue; - for (MCRegAliasIterator AI(MOI->getReg(), TRI, true); - AI.isValid(); ++AI) { + for (MCRegAliasIterator AI(MOI->getReg(), TRI, true); AI.isValid(); + ++AI) { unsigned Reg = *AI; const MDNode *Var = LiveUserVar[Reg]; if (!Var) @@ -1558,7 +1724,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { DbgValueHistoryMap::iterator HistI = DbgValues.find(Var); if (HistI == DbgValues.end()) continue; - SmallVectorImpl<const MachineInstr*> &History = HistI->second; + SmallVectorImpl<const MachineInstr *> &History = HistI->second; if (History.empty()) continue; const MachineInstr *Prev = History.back(); @@ -1580,7 +1746,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end(); I != E; ++I) { - SmallVectorImpl<const MachineInstr*> &History = I->second; + SmallVectorImpl<const MachineInstr *> &History = I->second; if (History.empty()) continue; @@ -1589,11 +1755,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) { const MachineBasicBlock *PrevMBB = Prev->getParent(); MachineBasicBlock::const_iterator LastMI = - PrevMBB->getLastNonDebugInstr(); + PrevMBB->getLastNonDebugInstr(); if (LastMI == PrevMBB->end()) // Drop DBG_VALUE for empty range. History.pop_back(); - else { + else if (PrevMBB != &PrevMBB->getParent()->back()) { // Terminate after LastMI. History.push_back(LastMI); } @@ -1613,45 +1779,43 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Record beginning of function. if (!PrologEndLoc.isUnknown()) { - DebugLoc FnStartDL = getFnDebugLoc(PrologEndLoc, - MF->getFunction()->getContext()); - recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), - FnStartDL.getScope(MF->getFunction()->getContext()), - // We'd like to list the prologue as "not statements" but GDB behaves - // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. - DWARF2_FLAG_IS_STMT); + DebugLoc FnStartDL = + getFnDebugLoc(PrologEndLoc, MF->getFunction()->getContext()); + recordSourceLine( + FnStartDL.getLine(), FnStartDL.getCol(), + FnStartDL.getScope(MF->getFunction()->getContext()), + // We'd like to list the prologue as "not statements" but GDB behaves + // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. + DWARF2_FLAG_IS_STMT); } } void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS]; DIVariable DV = Var->getVariable(); - if (DV.getTag() == dwarf::DW_TAG_arg_variable) { - DISubprogram Ctxt(DV.getContext()); - DIArray Variables = Ctxt.getVariables(); - // If the variable is a parameter (arg_variable) and this is an optimized - // build (the subprogram has a 'variables' list) make sure we keep the - // parameters in order. Otherwise we would produce an incorrect function - // type with parameters out of order if function parameters were used out of - // order or unused (see the call to addScopeVariable in endFunction where - // the remaining unused variables (including parameters) are added). - if (unsigned NumVariables = Variables.getNumElements()) { - // Keep the parameters at the start of the variables list. Search through - // current variable list (Vars) and the full function variable list in - // lock-step looking for this parameter in the full list to find the - // insertion point. - SmallVectorImpl<DbgVariable *>::iterator I = Vars.begin(); - unsigned j = 0; - while (I != Vars.end() && j != NumVariables && - Variables.getElement(j) != DV && - (*I)->getVariable().getTag() == dwarf::DW_TAG_arg_variable) { - if (Variables.getElement(j) == (*I)->getVariable()) - ++I; - ++j; - } - Vars.insert(I, Var); - return; + // Variables with positive arg numbers are parameters. + if (unsigned ArgNum = DV.getArgNumber()) { + // Keep all parameters in order at the start of the variable list to ensure + // function types are correct (no out-of-order parameters) + // + // This could be improved by only doing it for optimized builds (unoptimized + // builds have the right order to begin with), searching from the back (this + // would catch the unoptimized case quickly), or doing a binary search + // rather than linear search. + SmallVectorImpl<DbgVariable *>::iterator I = Vars.begin(); + while (I != Vars.end()) { + unsigned CurNum = (*I)->getVariable().getArgNumber(); + // A local (non-parameter) variable has been found, insert immediately + // before it. + if (CurNum == 0) + break; + // A later indexed parameter has been found, insert immediately before it. + if (CurNum > ArgNum) + break; + ++I; } + Vars.insert(I, Var); + return; } Vars.push_back(Var); @@ -1681,12 +1845,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { for (unsigned i = 0, e = AList.size(); i != e; ++i) { LexicalScope *AScope = AList[i]; DISubprogram SP(AScope->getScopeNode()); - if (SP.Verify()) { + if (SP.isSubprogram()) { // Collect info for variables that were optimized out. DIArray Variables = SP.getVariables(); for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { DIVariable DV(Variables.getElement(i)); - if (!DV || !DV.Verify() || !ProcessedVars.insert(DV)) + if (!DV || !DV.isVariable() || !ProcessedVars.insert(DV)) continue; // Check that DbgVariable for DV wasn't created earlier, when // findAbstractVariable() was called for inlined instance of DV. @@ -1695,7 +1859,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { if (AbstractVariables.lookup(CleanDV)) continue; if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext())) - addScopeVariable(Scope, new DbgVariable(DV, NULL)); + addScopeVariable(Scope, new DbgVariable(DV, NULL, this)); } } if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0) @@ -1707,11 +1871,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { if (!MF->getTarget().Options.DisableFramePointerElim(*MF)) TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); - DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(), - MMI->getFrameMoves())); - // Clear debug info - for (DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> >::iterator + for (ScopeVariablesMap::iterator I = ScopeVariables.begin(), E = ScopeVariables.end(); I != E; ++I) DeleteContainerPointers(I->second); ScopeVariables.clear(); @@ -1767,7 +1928,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, // Emit Methods //===----------------------------------------------------------------------===// -// Compute the size and offset of a DIE. +// Compute the size and offset of a DIE. The offset is relative to start of the +// CU. It returns the offset after laying out the DIE. unsigned DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Get the children. @@ -1778,7 +1940,7 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Get the abbreviation for this DIE. unsigned AbbrevNumber = Die->getAbbrevNumber(); - const DIEAbbrev *Abbrev = Abbreviations->at(AbbrevNumber - 1); + const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; // Set DIE offset Die->setOffset(Offset); @@ -1810,21 +1972,25 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { return Offset; } -// Compute the size and offset of all the DIEs. +// Compute the size and offset for each DIE. void DwarfUnits::computeSizeAndOffsets() { - // Offset from the beginning of debug info section. - unsigned AccuOffset = 0; + // Offset from the first CU in the debug info section is 0 initially. + unsigned SecOffset = 0; + + // Iterate over each compile unit and set the size and offsets for each + // DIE within each compile unit. All offsets are CU relative. for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), E = CUs.end(); I != E; ++I) { - (*I)->setDebugInfoOffset(AccuOffset); - unsigned Offset = - sizeof(int32_t) + // Length of Compilation Unit Info - sizeof(int16_t) + // DWARF version number - sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t); // Pointer Size (in bytes) + (*I)->setDebugInfoOffset(SecOffset); + // CU-relative offset is reset to 0 here. + unsigned Offset = sizeof(int32_t) + // Length of Unit Info + (*I)->getHeaderSize(); // Unit-specific headers + + // EndOffset here is CU-relative, after laying out + // all of the CU DIE. unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset); - AccuOffset += EndOffset; + SecOffset += EndOffset; } } @@ -1849,9 +2015,16 @@ void DwarfDebug::emitSectionLabels() { DwarfLineSectionSym = emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); emitSectionSym(Asm, TLOF.getDwarfLocSection()); - if (GenerateDwarfPubNamesSection) + if (GenerateGnuPubSections) { + DwarfGnuPubNamesSectionSym = + emitSectionSym(Asm, TLOF.getDwarfGnuPubNamesSection()); + DwarfGnuPubTypesSectionSym = + emitSectionSym(Asm, TLOF.getDwarfGnuPubTypesSection()); + } else if (HasDwarfPubSections) { emitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); - emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); + emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); + } + DwarfStrSectionSym = emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); if (useSplitDwarf()) { @@ -1871,10 +2044,10 @@ void DwarfDebug::emitSectionLabels() { } // Recursively emits a debug information entry. -void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) { +void DwarfDebug::emitDIE(DIE *Die, ArrayRef<DIEAbbrev *> Abbrevs) { // Get the abbreviation for this DIE. unsigned AbbrevNumber = Die->getAbbrevNumber(); - const DIEAbbrev *Abbrev = Abbrevs->at(AbbrevNumber - 1); + const DIEAbbrev *Abbrev = Abbrevs[AbbrevNumber - 1]; // Emit the code (index) for the abbreviation. if (Asm->isVerbose()) @@ -1889,26 +2062,44 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) { // Emit the DIE attribute values. for (unsigned i = 0, N = Values.size(); i < N; ++i) { - unsigned Attr = AbbrevData[i].getAttribute(); - unsigned Form = AbbrevData[i].getForm(); + dwarf::Attribute Attr = AbbrevData[i].getAttribute(); + dwarf::Form Form = AbbrevData[i].getForm(); assert(Form && "Too many attributes for DIE (check abbreviation)"); if (Asm->isVerbose()) Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr)); switch (Attr) { - case dwarf::DW_AT_abstract_origin: { + case dwarf::DW_AT_abstract_origin: + case dwarf::DW_AT_type: + case dwarf::DW_AT_friend: + case dwarf::DW_AT_specification: + case dwarf::DW_AT_import: + case dwarf::DW_AT_containing_type: { DIEEntry *E = cast<DIEEntry>(Values[i]); DIE *Origin = E->getEntry(); unsigned Addr = Origin->getOffset(); if (Form == dwarf::DW_FORM_ref_addr) { + assert(!useSplitDwarf() && "TODO: dwo files can't have relocations."); // For DW_FORM_ref_addr, output the offset from beginning of debug info // section. Origin->getOffset() returns the offset from start of the // compile unit. - DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - Addr += Holder.getCUOffset(Origin->getCompileUnit()); + CompileUnit *CU = CUDieMap.lookup(Origin->getCompileUnit()); + assert(CU && "CUDie should belong to a CU."); + Addr += CU->getDebugInfoOffset(); + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + Asm->EmitLabelPlusOffset(DwarfInfoSectionSym, Addr, + DIEEntry::getRefAddrSize(Asm)); + else + Asm->EmitLabelOffsetDifference(DwarfInfoSectionSym, Addr, + DwarfInfoSectionSym, + DIEEntry::getRefAddrSize(Asm)); + } else { + // Make sure Origin belong to the same CU. + assert(Die->getCompileUnit() == Origin->getCompileUnit() && + "The referenced DIE should belong to the same CU in ref4"); + Asm->EmitInt32(Addr); } - Asm->EmitInt32(Addr); break; } case dwarf::DW_AT_ranges: { @@ -1930,7 +2121,7 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) { case dwarf::DW_AT_location: { if (DIELabel *L = dyn_cast<DIELabel>(Values[i])) { if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - Asm->EmitLabelReference(L->getValue(), 4); + Asm->EmitSectionOffset(L->getValue(), DwarfDebugLocSectionSym); else Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4); } else { @@ -1984,20 +2175,10 @@ void DwarfUnits::emitUnits(DwarfDebug *DD, TheCU->getUniqueID())); // Emit size of content not including length itself - unsigned ContentSize = Die->getSize() + - sizeof(int16_t) + // DWARF version number - sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t); // Pointer Size (in bytes) + Asm->OutStreamer.AddComment("Length of Unit"); + Asm->EmitInt32(TheCU->getHeaderSize() + Die->getSize()); - Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); - Asm->EmitInt32(ContentSize); - Asm->OutStreamer.AddComment("DWARF version number"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); - Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()), - ASectionSym); - Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); + TheCU->emitHeader(ASection, ASectionSym); DD->emitDIE(Die, Abbreviations); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(USection->getLabelEndName(), @@ -2005,19 +2186,6 @@ void DwarfUnits::emitUnits(DwarfDebug *DD, } } -/// For a given compile unit DIE, returns offset from beginning of debug info. -unsigned DwarfUnits::getCUOffset(DIE *Die) { - assert(Die->getTag() == dwarf::DW_TAG_compile_unit && - "Input DIE should be compile unit in getCUOffset."); - for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), - E = CUs.end(); I != E; ++I) { - CompileUnit *TheCU = *I; - if (TheCU->getCUDie() == Die) - return TheCU->getDebugInfoOffset(); - } - llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits."); -} - // Emit the debug info section. void DwarfDebug::emitDebugInfo() { DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; @@ -2091,7 +2259,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { // Emit visible names into a hashed accelerator table section. void DwarfDebug::emitAccelNames() { - DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { @@ -2099,7 +2267,7 @@ void DwarfDebug::emitAccelNames() { const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNames(); for (StringMap<std::vector<DIE*> >::const_iterator GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); + StringRef Name = GI->getKey(); const std::vector<DIE *> &Entities = GI->second; for (std::vector<DIE *>::const_iterator DI = Entities.begin(), DE = Entities.end(); DI != DE; ++DI) @@ -2120,7 +2288,7 @@ void DwarfDebug::emitAccelNames() { // Emit objective C classes and categories into a hashed accelerator table // section. void DwarfDebug::emitAccelObjC() { - DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { @@ -2128,7 +2296,7 @@ void DwarfDebug::emitAccelObjC() { const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelObjC(); for (StringMap<std::vector<DIE*> >::const_iterator GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); + StringRef Name = GI->getKey(); const std::vector<DIE *> &Entities = GI->second; for (std::vector<DIE *>::const_iterator DI = Entities.begin(), DE = Entities.end(); DI != DE; ++DI) @@ -2148,7 +2316,7 @@ void DwarfDebug::emitAccelObjC() { // Emit namespace dies into a hashed accelerator table. void DwarfDebug::emitAccelNamespaces() { - DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { @@ -2156,7 +2324,7 @@ void DwarfDebug::emitAccelNamespaces() { const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNamespace(); for (StringMap<std::vector<DIE*> >::const_iterator GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); + StringRef Name = GI->getKey(); const std::vector<DIE *> &Entities = GI->second; for (std::vector<DIE *>::const_iterator DI = Entities.begin(), DE = Entities.end(); DI != DE; ++DI) @@ -2177,11 +2345,11 @@ void DwarfDebug::emitAccelNamespaces() { // Emit type dies into a hashed accelerator table. void DwarfDebug::emitAccelTypes() { std::vector<DwarfAccelTable::Atom> Atoms; - Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); - Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTag, + Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2)); - Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTypeFlags, + Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)); DwarfAccelTable AT(Atoms); for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), @@ -2191,7 +2359,7 @@ void DwarfDebug::emitAccelTypes() { = TheCU->getAccelTypes(); for (StringMap<std::vector<std::pair<DIE*, unsigned> > >::const_iterator GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); + StringRef Name = GI->getKey(); const std::vector<std::pair<DIE *, unsigned> > &Entities = GI->second; for (std::vector<std::pair<DIE *, unsigned> >::const_iterator DI = Entities.begin(), DE = Entities.end(); DI !=DE; ++DI) @@ -2209,23 +2377,85 @@ void DwarfDebug::emitAccelTypes() { AT.Emit(Asm, SectionBegin, &InfoHolder); } -/// emitDebugPubnames - Emit visible names into a debug pubnames section. +// Public name handling. +// The format for the various pubnames: +// +// dwarf pubnames - offset/name pairs where the offset is the offset into the CU +// for the DIE that is named. +// +// gnu pubnames - offset/index value/name tuples where the offset is the offset +// into the CU and the index value is computed according to the type of value +// for the DIE that is named. +// +// For type units the offset is the offset of the skeleton DIE. For split dwarf +// it's the offset within the debug_info/debug_types dwo section, however, the +// reference in the pubname header doesn't change. + +/// computeIndexValue - Compute the gdb index value for the DIE and CU. +static dwarf::PubIndexEntryDescriptor computeIndexValue(CompileUnit *CU, + DIE *Die) { + dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC; + + // We could have a specification DIE that has our most of our knowledge, + // look for that now. + DIEValue *SpecVal = Die->findAttribute(dwarf::DW_AT_specification); + if (SpecVal) { + DIE *SpecDIE = cast<DIEEntry>(SpecVal)->getEntry(); + if (SpecDIE->findAttribute(dwarf::DW_AT_external)) + Linkage = dwarf::GIEL_EXTERNAL; + } else if (Die->findAttribute(dwarf::DW_AT_external)) + Linkage = dwarf::GIEL_EXTERNAL; + + switch (Die->getTag()) { + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_enumeration_type: + return dwarf::PubIndexEntryDescriptor( + dwarf::GIEK_TYPE, CU->getLanguage() != dwarf::DW_LANG_C_plus_plus + ? dwarf::GIEL_STATIC + : dwarf::GIEL_EXTERNAL); + case dwarf::DW_TAG_typedef: + case dwarf::DW_TAG_base_type: + case dwarf::DW_TAG_subrange_type: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE, dwarf::GIEL_STATIC); + case dwarf::DW_TAG_namespace: + return dwarf::GIEK_TYPE; + case dwarf::DW_TAG_subprogram: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage); + case dwarf::DW_TAG_constant: + case dwarf::DW_TAG_variable: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage); + case dwarf::DW_TAG_enumerator: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, + dwarf::GIEL_STATIC); + default: + return dwarf::GIEK_NONE; + } +} + +/// emitDebugPubNames - Emit visible names into a debug pubnames section. /// -void DwarfDebug::emitDebugPubnames() { +void DwarfDebug::emitDebugPubNames(bool GnuStyle) { const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + const MCSection *PSec = + GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() + : Asm->getObjFileLowering().getDwarfPubNamesSection(); typedef DenseMap<const MDNode*, CompileUnit*> CUMapType; for (CUMapType::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { CompileUnit *TheCU = I->second; unsigned ID = TheCU->getUniqueID(); - if (TheCU->getGlobalNames().empty()) - continue; - // Start the dwarf pubnames section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfPubNamesSection()); + Asm->OutStreamer.SwitchSection(PSec); + + // Emit a label so we can reference the beginning of this pubname section. + if (GnuStyle) + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubnames", + TheCU->getUniqueID())); + // Emit the header. Asm->OutStreamer.AddComment("Length of Public Names Info"); Asm->EmitLabelDifference(Asm->GetTempSymbol("pubnames_end", ID), Asm->GetTempSymbol("pubnames_begin", ID), 4); @@ -2233,7 +2463,7 @@ void DwarfDebug::emitDebugPubnames() { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID)); Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); + Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), @@ -2244,18 +2474,27 @@ void DwarfDebug::emitDebugPubnames() { Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), 4); + // Emit the pubnames for this compilation unit. const StringMap<DIE*> &Globals = TheCU->getGlobalNames(); for (StringMap<DIE*>::const_iterator GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); - const DIE *Entity = GI->second; + DIE *Entity = GI->second; Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); + if (GnuStyle) { + dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity); + Asm->OutStreamer.AddComment( + Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); + Asm->EmitInt8(Desc.toBits()); + } + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); - Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1)); } Asm->OutStreamer.AddComment("End Mark"); @@ -2264,55 +2503,78 @@ void DwarfDebug::emitDebugPubnames() { } } -void DwarfDebug::emitDebugPubTypes() { +void DwarfDebug::emitDebugPubTypes(bool GnuStyle) { + const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + const MCSection *PSec = + GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() + : Asm->getObjFileLowering().getDwarfPubTypesSection(); + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { + E = CUMap.end(); + I != E; ++I) { CompileUnit *TheCU = I->second; // Start the dwarf pubtypes section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfPubTypesSection()); + Asm->OutStreamer.SwitchSection(PSec); + + // Emit a label so we can reference the beginning of this pubtype section. + if (GnuStyle) + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubtypes", + TheCU->getUniqueID())); + + // Emit the header. Asm->OutStreamer.AddComment("Length of Public Types Info"); Asm->EmitLabelDifference( - Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()), - Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4); + Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()), + Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin", - TheCU->getUniqueID())); + Asm->OutStreamer.EmitLabel( + Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID())); - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DW_PUBTYPES_VERSION); Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); - Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), - TheCU->getUniqueID()), - DwarfInfoSectionSym); + Asm->EmitSectionOffset( + Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()), + DwarfInfoSectionSym); Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), - TheCU->getUniqueID()), - Asm->GetTempSymbol(ISec->getLabelBeginName(), - TheCU->getUniqueID()), - 4); - - const StringMap<DIE*> &Globals = TheCU->getGlobalTypes(); - for (StringMap<DIE*>::const_iterator - GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + Asm->EmitLabelDifference( + Asm->GetTempSymbol(ISec->getLabelEndName(), TheCU->getUniqueID()), + Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()), 4); + + // Emit the pubtypes. + const StringMap<DIE *> &Globals = TheCU->getGlobalTypes(); + for (StringMap<DIE *>::const_iterator GI = Globals.begin(), + GE = Globals.end(); + GI != GE; ++GI) { const char *Name = GI->getKeyData(); DIE *Entity = GI->second; - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); + if (GnuStyle) { + dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity); + Asm->OutStreamer.AddComment( + Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); + Asm->EmitInt8(Desc.toBits()); + } + + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("External Name"); + // Emit the name with a terminating null byte. - Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1)); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength() + 1)); } Asm->OutStreamer.AddComment("End Mark"); Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end", - TheCU->getUniqueID())); + Asm->OutStreamer.EmitLabel( + Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID())); } } @@ -2367,24 +2629,18 @@ void DwarfUnits::emitAddresses(const MCSection *AddrSection) { // Start the dwarf addr section. Asm->OutStreamer.SwitchSection(AddrSection); - // Get all of the string pool entries and put them in an array by their ID so - // we can sort them. - SmallVector<std::pair<unsigned, - std::pair<MCSymbol*, unsigned>* >, 64> Entries; + // Order the address pool entries by ID + SmallVector<const MCExpr *, 64> Entries(AddressPool.size()); - for (DenseMap<MCSymbol*, std::pair<MCSymbol*, unsigned> >::iterator - I = AddressPool.begin(), E = AddressPool.end(); + for (DenseMap<const MCExpr *, unsigned>::iterator I = AddressPool.begin(), + E = AddressPool.end(); I != E; ++I) - Entries.push_back(std::make_pair(I->second.second, &(I->second))); - - array_pod_sort(Entries.begin(), Entries.end()); + Entries[I->second] = I->first; for (unsigned i = 0, e = Entries.size(); i != e; ++i) { - // Emit a label for reference from debug information entries. - MCSymbol *Sym = Entries[i].second->first; - if (Sym) - Asm->EmitLabelReference(Entries[i].second->first, - Asm->getDataLayout().getPointerSize()); + // Emit an expression for reference from debug information entries. + if (const MCExpr *Expr = Entries[i]) + Asm->OutStreamer.EmitValue(Expr, Asm->getDataLayout().getPointerSize()); else Asm->OutStreamer.EmitIntValue(0, Asm->getDataLayout().getPointerSize()); } @@ -2397,7 +2653,7 @@ void DwarfDebug::emitDebugStr() { Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); } -// Emit visible names into a debug loc section. +// Emit locations into the debug loc section. void DwarfDebug::emitDebugLoc() { if (DotDebugLocEntries.empty()) return; @@ -2426,9 +2682,9 @@ void DwarfDebug::emitDebugLoc() { Asm->OutStreamer.EmitIntValue(0, Size); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index)); } else { - Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size); - Asm->OutStreamer.EmitSymbolValue(Entry.End, Size); - DIVariable DV(Entry.Variable); + Asm->OutStreamer.EmitSymbolValue(Entry.getBeginSym(), Size); + Asm->OutStreamer.EmitSymbolValue(Entry.getEndSym(), Size); + DIVariable DV(Entry.getVariable()); Asm->OutStreamer.AddComment("Loc expr size"); MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol(); MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol(); @@ -2448,17 +2704,18 @@ void DwarfDebug::emitDebugLoc() { Asm->EmitULEB128(Entry.getInt()); } } else if (Entry.isLocation()) { + MachineLocation Loc = Entry.getLoc(); if (!DV.hasComplexAddress()) // Regular entry. - Asm->EmitDwarfRegOp(Entry.Loc); + Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); else { // Complex address entry. unsigned N = DV.getNumAddrElements(); unsigned i = 0; if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { - if (Entry.Loc.getOffset()) { + if (Loc.getOffset()) { i = 2; - Asm->EmitDwarfRegOp(Entry.Loc); + Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); Asm->OutStreamer.AddComment("DW_OP_deref"); Asm->EmitInt8(dwarf::DW_OP_deref); Asm->OutStreamer.AddComment("DW_OP_plus_uconst"); @@ -2467,12 +2724,12 @@ void DwarfDebug::emitDebugLoc() { } else { // If first address element is OpPlus then emit // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1)); - Asm->EmitDwarfRegOp(Loc); + MachineLocation TLoc(Loc.getReg(), DV.getAddrElement(1)); + Asm->EmitDwarfRegOp(TLoc, DV.isIndirect()); i = 2; } } else { - Asm->EmitDwarfRegOp(Entry.Loc); + Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); } // Emit remaining complex address elements. @@ -2482,7 +2739,7 @@ void DwarfDebug::emitDebugLoc() { Asm->EmitInt8(dwarf::DW_OP_plus_uconst); Asm->EmitULEB128(DV.getAddrElement(++i)); } else if (Element == DIBuilder::OpDeref) { - if (!Entry.Loc.isReg()) + if (!Loc.isReg()) Asm->EmitInt8(dwarf::DW_OP_deref); } else llvm_unreachable("unknown Opcode found in complex address"); @@ -2496,18 +2753,178 @@ void DwarfDebug::emitDebugLoc() { } } -// Emit visible names into a debug aranges section. +struct SymbolCUSorter { + SymbolCUSorter(const MCStreamer &s) : Streamer(s) {} + const MCStreamer &Streamer; + + bool operator() (const SymbolCU &A, const SymbolCU &B) { + unsigned IA = A.Sym ? Streamer.GetSymbolOrder(A.Sym) : 0; + unsigned IB = B.Sym ? Streamer.GetSymbolOrder(B.Sym) : 0; + + // Symbols with no order assigned should be placed at the end. + // (e.g. section end labels) + if (IA == 0) + IA = (unsigned)(-1); + if (IB == 0) + IB = (unsigned)(-1); + return IA < IB; + } +}; + +static bool CUSort(const CompileUnit *A, const CompileUnit *B) { + return (A->getUniqueID() < B->getUniqueID()); +} + +struct ArangeSpan { + const MCSymbol *Start, *End; +}; + +// Emit a debug aranges section, containing a CU lookup for any +// address we can tie back to a CU. void DwarfDebug::emitDebugARanges() { // Start the dwarf aranges section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfARangesSection()); + Asm->OutStreamer + .SwitchSection(Asm->getObjFileLowering().getDwarfARangesSection()); + + typedef DenseMap<CompileUnit *, std::vector<ArangeSpan> > SpansType; + + SpansType Spans; + + // Build a list of sections used. + std::vector<const MCSection *> Sections; + for (SectionMapType::iterator it = SectionMap.begin(); it != SectionMap.end(); + it++) { + const MCSection *Section = it->first; + Sections.push_back(Section); + } + + // Sort the sections into order. + // This is only done to ensure consistent output order across different runs. + std::sort(Sections.begin(), Sections.end(), SectionSort); + + // Build a set of address spans, sorted by CU. + for (size_t SecIdx=0;SecIdx<Sections.size();SecIdx++) { + const MCSection *Section = Sections[SecIdx]; + SmallVector<SymbolCU, 8> &List = SectionMap[Section]; + if (List.size() < 2) + continue; + + // Sort the symbols by offset within the section. + SymbolCUSorter sorter(Asm->OutStreamer); + std::sort(List.begin(), List.end(), sorter); + + // If we have no section (e.g. common), just write out + // individual spans for each symbol. + if (Section == NULL) { + for (size_t n = 0; n < List.size(); n++) { + const SymbolCU &Cur = List[n]; + + ArangeSpan Span; + Span.Start = Cur.Sym; + Span.End = NULL; + if (Cur.CU) + Spans[Cur.CU].push_back(Span); + } + } else { + // Build spans between each label. + const MCSymbol *StartSym = List[0].Sym; + for (size_t n = 1; n < List.size(); n++) { + const SymbolCU &Prev = List[n - 1]; + const SymbolCU &Cur = List[n]; + + // Try and build the longest span we can within the same CU. + if (Cur.CU != Prev.CU) { + ArangeSpan Span; + Span.Start = StartSym; + Span.End = Cur.Sym; + Spans[Prev.CU].push_back(Span); + StartSym = Cur.Sym; + } + } + } + } + + const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + unsigned PtrSize = Asm->getDataLayout().getPointerSize(); + + // Build a list of CUs used. + std::vector<CompileUnit *> CUs; + for (SpansType::iterator it = Spans.begin(); it != Spans.end(); it++) { + CompileUnit *CU = it->first; + CUs.push_back(CU); + } + + // Sort the CU list (again, to ensure consistent output order). + std::sort(CUs.begin(), CUs.end(), CUSort); + + // Emit an arange table for each CU we used. + for (size_t CUIdx=0;CUIdx<CUs.size();CUIdx++) { + CompileUnit *CU = CUs[CUIdx]; + std::vector<ArangeSpan> &List = Spans[CU]; + + // Emit size of content not including length itself. + unsigned ContentSize + = sizeof(int16_t) // DWARF ARange version number + + sizeof(int32_t) // Offset of CU in the .debug_info section + + sizeof(int8_t) // Pointer Size (in bytes) + + sizeof(int8_t); // Segment Size (in bytes) + + unsigned TupleSize = PtrSize * 2; + + // 7.20 in the Dwarf specs requires the table to be aligned to a tuple. + unsigned Padding = 0; + while (((sizeof(int32_t) + ContentSize + Padding) % TupleSize) != 0) + Padding++; + + ContentSize += Padding; + ContentSize += (List.size() + 1) * TupleSize; + + // For each compile unit, write the list of spans it covers. + Asm->OutStreamer.AddComment("Length of ARange Set"); + Asm->EmitInt32(ContentSize); + Asm->OutStreamer.AddComment("DWARF Arange version number"); + Asm->EmitInt16(dwarf::DW_ARANGES_VERSION); + Asm->OutStreamer.AddComment("Offset Into Debug Info Section"); + Asm->EmitSectionOffset( + Asm->GetTempSymbol(ISec->getLabelBeginName(), CU->getUniqueID()), + DwarfInfoSectionSym); + Asm->OutStreamer.AddComment("Address Size (in bytes)"); + Asm->EmitInt8(PtrSize); + Asm->OutStreamer.AddComment("Segment Size (in bytes)"); + Asm->EmitInt8(0); + + for (unsigned n = 0; n < Padding; n++) + Asm->EmitInt8(0xff); + + for (unsigned n = 0; n < List.size(); n++) { + const ArangeSpan &Span = List[n]; + Asm->EmitLabelReference(Span.Start, PtrSize); + + // Calculate the size as being from the span start to it's end. + if (Span.End) { + Asm->EmitLabelDifference(Span.End, Span.Start, PtrSize); + } else { + // For symbols without an end marker (e.g. common), we + // write a single arange entry containing just that one symbol. + uint64_t Size = SymSize[Span.Start]; + if (Size == 0) + Size = 1; + + Asm->OutStreamer.EmitIntValue(Size, PtrSize); + } + } + + Asm->OutStreamer.AddComment("ARange terminator"); + Asm->OutStreamer.EmitIntValue(0, PtrSize); + Asm->OutStreamer.EmitIntValue(0, PtrSize); + } } // Emit visible names into a debug ranges section. void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfRangesSection()); + Asm->OutStreamer + .SwitchSection(Asm->getObjFileLowering().getDwarfRangesSection()); unsigned char Size = Asm->getDataLayout().getPointerSize(); for (SmallVectorImpl<const MCSymbol *>::iterator I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); @@ -2528,113 +2945,27 @@ void DwarfDebug::emitDebugMacInfo() { } } -// Emit inline info using following format. -// Section Header: -// 1. length of section -// 2. Dwarf version number -// 3. address size. -// -// Entries (one "entry" for each function that was inlined): -// -// 1. offset into __debug_str section for MIPS linkage name, if exists; -// otherwise offset into __debug_str for regular function name. -// 2. offset into __debug_str section for regular function name. -// 3. an unsigned LEB128 number indicating the number of distinct inlining -// instances for the function. -// -// The rest of the entry consists of a {die_offset, low_pc} pair for each -// inlined instance; the die_offset points to the inlined_subroutine die in the -// __debug_info section, and the low_pc is the starting address for the -// inlining instance. -void DwarfDebug::emitDebugInlineInfo() { - if (!Asm->MAI->doesDwarfUseInlineInfoSection()) - return; - - if (!FirstCU) - return; - - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfDebugInlineSection()); - - Asm->OutStreamer.AddComment("Length of Debug Inlined Information Entry"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_inlined_end", 1), - Asm->GetTempSymbol("debug_inlined_begin", 1), 4); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_begin", 1)); - - Asm->OutStreamer.AddComment("Dwarf Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); - - for (SmallVectorImpl<const MDNode *>::iterator I = InlinedSPNodes.begin(), - E = InlinedSPNodes.end(); I != E; ++I) { - - const MDNode *Node = *I; - DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II - = InlineInfo.find(Node); - SmallVectorImpl<InlineInfoLabels> &Labels = II->second; - DISubprogram SP(Node); - StringRef LName = SP.getLinkageName(); - StringRef Name = SP.getName(); - - Asm->OutStreamer.AddComment("MIPS linkage name"); - if (LName.empty()) - Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name), - DwarfStrSectionSym); - else - Asm->EmitSectionOffset(InfoHolder - .getStringPoolEntry(getRealLinkageName(LName)), - DwarfStrSectionSym); - - Asm->OutStreamer.AddComment("Function name"); - Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name), - DwarfStrSectionSym); - Asm->EmitULEB128(Labels.size(), "Inline count"); - - for (SmallVectorImpl<InlineInfoLabels>::iterator LI = Labels.begin(), - LE = Labels.end(); LI != LE; ++LI) { - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); - Asm->EmitInt32(LI->second->getOffset()); - - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc"); - Asm->OutStreamer.EmitSymbolValue(LI->first, - Asm->getDataLayout().getPointerSize()); - } - } - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1)); -} - // DWARF5 Experimental Separate Dwarf emitters. // This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list, // DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id, -// DW_AT_ranges_base, DW_AT_addr_base. If DW_AT_ranges is present, -// DW_AT_low_pc and DW_AT_high_pc are not used, and vice versa. -CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { - DICompileUnit DIUnit(N); - CompilationDir = DIUnit.getDirectory(); +// DW_AT_ranges_base, DW_AT_addr_base. +CompileUnit *DwarfDebug::constructSkeletonCU(const CompileUnit *CU) { DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, - DIUnit.getLanguage(), Die, Asm, - this, &SkeletonHolder); + CompileUnit *NewCU = new CompileUnit(CU->getUniqueID(), Die, CU->getNode(), + Asm, this, &SkeletonHolder); NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, - DIUnit.getSplitDebugFilename()); - - // This should be a unique identifier when we want to build .dwp files. - NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); + CU->getNode().getSplitDebugFilename()); // Relocate to the beginning of the addr_base section, else 0 for the // beginning of the one for this compile unit. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, - DwarfAddrSectionSym); + NewCU->addSectionLabel(Die, dwarf::DW_AT_GNU_addr_base, + DwarfAddrSectionSym); else - NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, - dwarf::DW_FORM_sec_offset, 0); + NewCU->addSectionOffset(Die, dwarf::DW_AT_GNU_addr_base, 0); // 2.17.1 requires that we use DW_AT_low_pc for a single entry point // into an entity. We're using 0, or a NULL label for this. @@ -2644,14 +2975,47 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { // compile unit in debug_line section. // FIXME: Should handle multiple compile units. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, - DwarfLineSectionSym); + NewCU->addSectionLabel(Die, dwarf::DW_AT_stmt_list, + DwarfLineSectionSym); else - NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, 0); + NewCU->addSectionOffset(Die, dwarf::DW_AT_stmt_list, 0); if (!CompilationDir.empty()) NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + // Flags to let the linker know we have emitted new style pubnames. + if (GenerateGnuPubSections) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addSectionLabel( + Die, dwarf::DW_AT_GNU_pubnames, + Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID())); + else + NewCU->addSectionDelta( + Die, dwarf::DW_AT_GNU_pubnames, + Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID()), + DwarfGnuPubNamesSectionSym); + + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addSectionLabel( + Die, dwarf::DW_AT_GNU_pubtypes, + Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID())); + else + NewCU->addSectionDelta( + Die, dwarf::DW_AT_GNU_pubtypes, + Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID()), + DwarfGnuPubTypesSectionSym); + } + + // Flag if we've emitted any ranges and their location for the compile unit. + if (DebugRangeSymbols.size()) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addSectionLabel(Die, dwarf::DW_AT_GNU_ranges_base, + DwarfDebugRangeSectionSym); + else + NewCU->addUInt(Die, dwarf::DW_AT_GNU_ranges_base, dwarf::DW_FORM_data4, + 0); + } + SkeletonHolder.addUnit(NewCU); SkeletonCUs.push_back(NewCU); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 24f758d..cebac39 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -41,7 +41,6 @@ class DIEAbbrev; class DIE; class DIEBlock; class DIEEntry; -class DwarfDebug; //===----------------------------------------------------------------------===// /// \brief This class is used to record source line correspondence. @@ -63,13 +62,12 @@ public: /// \brief This struct describes location entries emitted in the .debug_loc /// section. -typedef struct DotDebugLocEntry { +class DotDebugLocEntry { + // Begin and end symbols for the address range that this location is valid. const MCSymbol *Begin; const MCSymbol *End; - MachineLocation Loc; - const MDNode *Variable; - bool Merged; - bool Constant; + + // Type of entry that this represents. enum EntryType { E_Location, E_Integer, @@ -83,23 +81,42 @@ typedef struct DotDebugLocEntry { const ConstantFP *CFP; const ConstantInt *CIP; } Constants; - DotDebugLocEntry() - : Begin(0), End(0), Variable(0), Merged(false), - Constant(false) { Constants.Int = 0;} + + // The location in the machine frame. + MachineLocation Loc; + + // The variable to which this location entry corresponds. + const MDNode *Variable; + + // Whether this location has been merged. + bool Merged; + +public: + DotDebugLocEntry() : Begin(0), End(0), Variable(0), Merged(false) { + Constants.Int = 0; + } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L, const MDNode *V) - : Begin(B), End(E), Loc(L), Variable(V), Merged(false), - Constant(false) { Constants.Int = 0; EntryKind = E_Location; } + : Begin(B), End(E), Loc(L), Variable(V), Merged(false) { + Constants.Int = 0; + EntryKind = E_Location; + } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i) - : Begin(B), End(E), Variable(0), Merged(false), - Constant(true) { Constants.Int = i; EntryKind = E_Integer; } + : Begin(B), End(E), Variable(0), Merged(false) { + Constants.Int = i; + EntryKind = E_Integer; + } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr) - : Begin(B), End(E), Variable(0), Merged(false), - Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; } + : Begin(B), End(E), Variable(0), Merged(false) { + Constants.CFP = FPtr; + EntryKind = E_ConstantFP; + } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr) - : Begin(B), End(E), Variable(0), Merged(false), - Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; } + : Begin(B), End(E), Variable(0), Merged(false) { + Constants.CIP = IPtr; + EntryKind = E_ConstantInt; + } /// \brief Empty entries are also used as a trigger to emit temp label. Such /// labels are referenced is used to find debug_loc offset for a given DIE. @@ -115,10 +132,14 @@ typedef struct DotDebugLocEntry { bool isInt() const { return EntryKind == E_Integer; } bool isConstantFP() const { return EntryKind == E_ConstantFP; } bool isConstantInt() const { return EntryKind == E_ConstantInt; } - int64_t getInt() { return Constants.Int; } - const ConstantFP *getConstantFP() { return Constants.CFP; } - const ConstantInt *getConstantInt() { return Constants.CIP; } -} DotDebugLocEntry; + int64_t getInt() const { return Constants.Int; } + const ConstantFP *getConstantFP() const { return Constants.CFP; } + const ConstantInt *getConstantInt() const { return Constants.CIP; } + const MDNode *getVariable() const { return Variable; } + const MCSymbol *getBeginSym() const { return Begin; } + const MCSymbol *getEndSym() const { return End; } + MachineLocation getLoc() const { return Loc; } +}; //===----------------------------------------------------------------------===// /// \brief This class is used to track local variable information. @@ -129,11 +150,12 @@ class DbgVariable { DbgVariable *AbsVar; // Corresponding Abstract variable, if any. const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. int FrameIndex; + DwarfDebug *DD; public: // AbsVar may be NULL. - DbgVariable(DIVariable V, DbgVariable *AV) + DbgVariable(DIVariable V, DbgVariable *AV, DwarfDebug *DD) : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0), - FrameIndex(~0) {} + FrameIndex(~0), DD(DD) {} // Accessors. DIVariable getVariable() const { return Var; } @@ -148,7 +170,7 @@ public: int getFrameIndex() const { return FrameIndex; } void setFrameIndex(int FI) { FrameIndex = FI; } // Translate tag to proper Dwarf tag. - unsigned getTag() const { + uint16_t getTag() const { if (Var.getTag() == dwarf::DW_TAG_arg_variable) return dwarf::DW_TAG_formal_parameter; @@ -172,32 +194,27 @@ public: } bool variableHasComplexAddress() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); + assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.hasComplexAddress(); } bool isBlockByrefVariable() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); + assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.isBlockByrefVariable(); } unsigned getNumAddrElements() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); + assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.getNumAddrElements(); } uint64_t getAddrElement(unsigned i) const { return Var.getAddrElement(i); } DIType getType() const; -}; - -// A String->Symbol mapping of strings used by indirect -// references. -typedef StringMap<std::pair<MCSymbol*, unsigned>, - BumpPtrAllocator&> StrPool; - -// A Symbol->pair<Symbol, unsigned> mapping of addresses used by indirect -// references. -typedef DenseMap<MCSymbol *, std::pair<MCSymbol *, unsigned> > AddrPool; +private: + /// resolve - Look in the DwarfDebug map for the MDNode that + /// corresponds to the reference. + template <typename T> T resolve(DIRef<T> Ref) const; +}; /// \brief Collects and handles information specific to a particular /// collection of units. @@ -209,27 +226,34 @@ class DwarfUnits { FoldingSet<DIEAbbrev> *AbbreviationsSet; // A list of all the unique abbreviations in use. - std::vector<DIEAbbrev *> *Abbreviations; + std::vector<DIEAbbrev *> &Abbreviations; // A pointer to all units in the section. SmallVector<CompileUnit *, 1> CUs; // Collection of strings for this unit and assorted symbols. + // A String->Symbol mapping of strings used by indirect + // references. + typedef StringMap<std::pair<MCSymbol*, unsigned>, + BumpPtrAllocator&> StrPool; StrPool StringPool; unsigned NextStringPoolNumber; std::string StringPref; // Collection of addresses for this unit and assorted labels. + // A Symbol->unsigned mapping of addresses used by indirect + // references. + typedef DenseMap<const MCExpr *, unsigned> AddrPool; AddrPool AddressPool; unsigned NextAddrPoolNumber; public: DwarfUnits(AsmPrinter *AP, FoldingSet<DIEAbbrev> *AS, - std::vector<DIEAbbrev *> *A, const char *Pref, - BumpPtrAllocator &DA) : - Asm(AP), AbbreviationsSet(AS), Abbreviations(A), - StringPool(DA), NextStringPoolNumber(0), StringPref(Pref), - AddressPool(), NextAddrPoolNumber(0) {} + std::vector<DIEAbbrev *> &A, const char *Pref, + BumpPtrAllocator &DA) + : Asm(AP), AbbreviationsSet(AS), Abbreviations(A), StringPool(DA), + NextStringPoolNumber(0), StringPref(Pref), AddressPool(), + NextAddrPoolNumber(0) {} /// \brief Compute the size and offset of a DIE given an incoming Offset. unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); @@ -245,14 +269,15 @@ public: /// \brief Emit all of the units to the section listed with the given /// abbreviation section. - void emitUnits(DwarfDebug *, const MCSection *, const MCSection *, - const MCSymbol *); + void emitUnits(DwarfDebug *DD, const MCSection *USection, + const MCSection *ASection, const MCSymbol *ASectionSym); /// \brief Emit all of the strings to the section given. - void emitStrings(const MCSection *, const MCSection *, const MCSymbol *); + void emitStrings(const MCSection *StrSection, const MCSection *OffsetSection, + const MCSymbol *StrSecSym); /// \brief Emit all of the addresses to the section given. - void emitAddresses(const MCSection *); + void emitAddresses(const MCSection *AddrSection); /// \brief Returns the entry into the start of the pool. MCSymbol *getStringPoolSym(); @@ -270,14 +295,18 @@ public: /// \brief Returns the index into the address pool with the given /// label/symbol. - unsigned getAddrPoolIndex(MCSymbol *); + unsigned getAddrPoolIndex(const MCExpr *Sym); + unsigned getAddrPoolIndex(const MCSymbol *Sym); /// \brief Returns the address pool. AddrPool *getAddrPool() { return &AddressPool; } +}; - /// \brief for a given compile unit DIE, returns offset from beginning of - /// debug info. - unsigned getCUOffset(DIE *Die); +/// \brief Helper used to pair up a symbol and its DWARF compile unit. +struct SymbolCU { + SymbolCU(CompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} + const MCSymbol *Sym; + CompileUnit *CU; }; /// \brief Collects and handles dwarf debug information. @@ -291,10 +320,7 @@ class DwarfDebug { // All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; - //===--------------------------------------------------------------------===// - // Attribute used to construct specific Dwarf sections. - // - + // Handle to the a compile unit used for the inline extension handling. CompileUnit *FirstCU; // Maps MDNode with its corresponding CompileUnit. @@ -303,6 +329,14 @@ class DwarfDebug { // Maps subprogram MDNode with its corresponding CompileUnit. DenseMap <const MDNode *, CompileUnit *> SPMap; + // Maps a CU DIE with its corresponding CompileUnit. + DenseMap <const DIE *, CompileUnit *> CUDieMap; + + /// Maps MDNodes for type sysstem with the corresponding DIEs. These DIEs can + /// be shared across CUs, that is why we keep the map here instead + /// of in CompileUnit. + DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap; + // Used to uniquely define abbreviations. FoldingSet<DIEAbbrev> AbbreviationsSet; @@ -315,10 +349,17 @@ class DwarfDebug { // separated by a zero byte, mapped to a unique id. StringMap<unsigned, BumpPtrAllocator&> SourceIdMap; + // List of all labels used in aranges generation. + std::vector<SymbolCU> ArangeLabels; + + // Size of each symbol emitted (for those symbols that have a specific size). + DenseMap <const MCSymbol *, uint64_t> SymSize; + // Provides a unique id per text section. - SetVector<const MCSection*> SectionMap; + typedef DenseMap<const MCSection *, SmallVector<SymbolCU, 8> > SectionMapType; + SectionMapType SectionMap; - // List of Arguments (DbgValues) for current function. + // List of arguments for current function. SmallVector<DbgVariable *, 8> CurrentFnArguments; LexicalScopes LScopes; @@ -327,7 +368,9 @@ class DwarfDebug { DenseMap<const MDNode *, DIE *> AbstractSPDies; // Collection of dbg variables of a scope. - DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > ScopeVariables; + typedef DenseMap<LexicalScope *, + SmallVector<DbgVariable *, 8> > ScopeVariablesMap; + ScopeVariablesMap ScopeVariables; // Collection of abstract variables. DenseMap<const MDNode *, DbgVariable *> AbstractVariables; @@ -339,12 +382,6 @@ class DwarfDebug { // as DW_AT_inline. SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs; - // Keep track of inlined functions and their location. This - // information is used to populate the debug_inlined section. - typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels; - DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo; - SmallVector<const MDNode *, 4> InlinedSPNodes; - // This is a collection of subprogram MDNodes that are processed to // create DIEs. SmallPtrSet<const MDNode *, 16> ProcessedSPNodes; @@ -377,16 +414,6 @@ class DwarfDebug { // body. DebugLoc PrologEndLoc; - struct FunctionDebugFrameInfo { - unsigned Number; - std::vector<MachineMove> Moves; - - FunctionDebugFrameInfo(unsigned Num, const std::vector<MachineMove> &M) - : Number(Num), Moves(M) {} - }; - - std::vector<FunctionDebugFrameInfo> DebugFrames; - // Section Symbols: these are assembler temporary labels that are emitted at // the beginning of each supported dwarf section. These are used to form // section offsets and are created by EmitSectionLabels. @@ -395,9 +422,10 @@ class DwarfDebug { MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym; + MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym; // As an optimization, there is no need to emit an entry in the directory - // table for the same directory as DW_at_comp_dir. + // table for the same directory as DW_AT_comp_dir. StringRef CompilationDir; // Counter for assigning globally unique IDs for CUs. @@ -409,8 +437,19 @@ class DwarfDebug { // Holders for the various debug information flags that we might need to // have exposed. See accessor functions below for description. - // Whether or not we're emitting info for older versions of gdb on darwin. - bool IsDarwinGDBCompat; + // Holder for imported entities. + typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32> + ImportedEntityMap; + ImportedEntityMap ScopesWithImportedEntities; + + // Holder for types that are going to be extracted out into a type unit. + std::vector<DIE *> TypeUnits; + + // Whether to emit the pubnames/pubtypes sections. + bool HasDwarfPubSections; + + // Version of dwarf we're emitting. + unsigned DwarfVersion; // DWARF5 Experimental Options bool HasDwarfAccelTables; @@ -433,9 +472,8 @@ class DwarfDebug { // Holder for the skeleton information. DwarfUnits SkeletonHolder; - typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32> - ImportedEntityMap; - ImportedEntityMap ScopesWithImportedEntities; + // Maps from a type identifier to the actual MDNode. + DITypeIdentifierMap TypeIdentifierMap; private: @@ -448,11 +486,14 @@ private: /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global /// variables in this scope then create and insert DIEs for these /// variables. - DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, const MDNode *SPNode); + DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP); /// \brief Construct new DW_TAG_lexical_block for this scope and /// attach DW_AT_low_pc/DW_AT_high_pc labels. DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); + /// A helper function to check whether the DIE for a given Scope is going + /// to be null. + bool isLexicalScopeDIENull(LexicalScope *Scope); /// \brief This scope represents inlined body of a function. Construct /// DIE to represent this concrete inlined copy of the function. @@ -460,6 +501,9 @@ private: /// \brief Construct a DIE for this scope. DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); + /// A helper function to create children of a Scope DIE. + DIE *createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope, + SmallVectorImpl<DIE*> &Children); /// \brief Emit initial Dwarf sections with a label at the start of each one. void emitSectionLabels(); @@ -511,10 +555,16 @@ private: void emitAccelTypes(); /// \brief Emit visible names into a debug pubnames section. - void emitDebugPubnames(); + /// \param GnuStyle determines whether or not we want to emit + /// additional information into the table ala newer gcc for gdb + /// index. + void emitDebugPubNames(bool GnuStyle = false); /// \brief Emit visible types into a debug pubtypes section. - void emitDebugPubTypes(); + /// \param GnuStyle determines whether or not we want to emit + /// additional information into the table ala newer gcc for gdb + /// index. + void emitDebugPubTypes(bool GnuStyle = false); /// \brief Emit visible names into a debug str section. void emitDebugStr(); @@ -538,7 +588,7 @@ private: /// \brief Construct the split debug info compile unit for the debug info /// section. - CompileUnit *constructSkeletonCU(const MDNode *); + CompileUnit *constructSkeletonCU(const CompileUnit *CU); /// \brief Emit the local split abbreviations. void emitSkeletonAbbrevs(const MCSection *); @@ -554,21 +604,21 @@ private: /// \brief Create new CompileUnit for the given metadata node with tag /// DW_TAG_compile_unit. - CompileUnit *constructCompileUnit(const MDNode *N); + CompileUnit *constructCompileUnit(DICompileUnit DIUnit); /// \brief Construct subprogram DIE. void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N); - /// \brief Construct import_module DIE. - void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N); + /// \brief Construct imported_module or imported_declaration DIE. + void constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N); /// \brief Construct import_module DIE. - void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N, + void constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N, DIE *Context); /// \brief Construct import_module DIE. - void constructImportedModuleDIE(CompileUnit *TheCU, - const DIImportedModule &Module, + void constructImportedEntityDIE(CompileUnit *TheCU, + const DIImportedEntity &Module, DIE *Context); /// \brief Register a source line with debug info. Returns the unique @@ -616,7 +666,13 @@ public: // Main entry points. // DwarfDebug(AsmPrinter *A, Module *M); - ~DwarfDebug(); + + void insertDIE(const MDNode *TypeMD, DIE *Die) { + MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); + } + DIE *getDIE(const MDNode *TypeMD) { + return MDTypeNodeToDieMap.lookup(TypeMD); + } /// \brief Emit all Dwarf sections that should come prior to the /// content. @@ -637,6 +693,17 @@ public: /// \brief Process end of an instruction. void endInstruction(const MachineInstr *MI); + /// \brief Add a DIE to the set of types that we're going to pull into + /// type units. + void addTypeUnitType(DIE *Die) { TypeUnits.push_back(Die); } + + /// \brief Add a label so that arange data can be generated for it. + void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } + + /// \brief For symbols that have a size designated (e.g. common symbols), + /// this tracks that size. + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) { SymSize[Sym] = Size;} + /// \brief Look up the source id with the given directory and source file /// names. If none currently exists, create a new id and insert it in the /// SourceIds map. @@ -644,11 +711,7 @@ public: unsigned CUID); /// \brief Recursively Emits a debug information entry. - void emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs); - - /// \brief Returns whether or not to limit some of our debug - /// output to the limitations of darwin gdb. - bool useDarwinGDBCompat() { return IsDarwinGDBCompat; } + void emitDIE(DIE *Die, ArrayRef<DIEAbbrev *> Abbrevs); // Experimental DWARF5 features. @@ -659,6 +722,19 @@ public: /// \brief Returns whether or not to change the current debug info for the /// split dwarf proposal support. bool useSplitDwarf() { return HasSplitDwarf; } + + /// Returns the Dwarf Version. + unsigned getDwarfVersion() const { return DwarfVersion; } + + /// Find the MDNode for the given reference. + template <typename T> T resolve(DIRef<T> Ref) const { + return Ref.resolve(TypeIdentifierMap); + } + + /// isSubprogramContext - Return true if Context is either a subprogram + /// or another context nested inside a subprogram. + bool isSubprogramContext(const MDNode *Context); + }; } // End of namespace llvm diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index 74b1b13..1575161 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -23,13 +23,13 @@ namespace llvm { template <typename T> class SmallVectorImpl; struct LandingPadInfo; class MachineModuleInfo; -class MachineMove; class MachineInstr; class MachineFunction; class MCAsmInfo; class MCExpr; class MCSymbol; class Function; +class ARMTargetStreamer; class AsmPrinter; //===----------------------------------------------------------------------===// @@ -178,6 +178,8 @@ public: class ARMException : public DwarfException { void EmitTypeInfos(unsigned TTypeEncoding); + ARMTargetStreamer &getTargetStreamer(); + public: //===--------------------------------------------------------------------===// // Main entry points. diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp index 4a99184..24aa1ab 100644 --- a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -26,18 +26,20 @@ using namespace llvm; namespace { class BasicTTI : public ImmutablePass, public TargetTransformInfo { - const TargetLoweringBase *TLI; + const TargetMachine *TM; /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the result needs to be inserted and/or extracted from vectors. unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); } + public: - BasicTTI() : ImmutablePass(ID), TLI(0) { + BasicTTI() : ImmutablePass(ID), TM(0) { llvm_unreachable("This pass cannot be directly constructed"); } - BasicTTI(const TargetLoweringBase *TLI) : ImmutablePass(ID), TLI(TLI) { + BasicTTI(const TargetMachine *TM) : ImmutablePass(ID), TM(TM) { initializeBasicTTIPass(*PassRegistry::getPassRegistry()); } @@ -63,6 +65,8 @@ public: return this; } + virtual bool hasBranchDivergence() const; + /// \name Scalar TTI Implementations /// @{ @@ -71,11 +75,16 @@ public: virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale) const; + virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) const; virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; virtual bool isTypeLegal(Type *Ty) const; virtual unsigned getJumpBufAlignment() const; virtual unsigned getJumpBufSize() const; virtual bool shouldBuildLookupTables() const; + virtual bool haveFastSqrt(Type *Ty) const; + virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; /// @} @@ -103,7 +112,8 @@ public: virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, ArrayRef<Type*> Tys) const; virtual unsigned getNumberOfParts(Type *Tp) const; - virtual unsigned getAddressComputationCost(Type *Ty) const; + virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const; + virtual unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) const; /// @} }; @@ -115,17 +125,18 @@ INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti", char BasicTTI::ID = 0; ImmutablePass * -llvm::createBasicTargetTransformInfoPass(const TargetLoweringBase *TLI) { - return new BasicTTI(TLI); +llvm::createBasicTargetTransformInfoPass(const TargetMachine *TM) { + return new BasicTTI(TM); } +bool BasicTTI::hasBranchDivergence() const { return false; } bool BasicTTI::isLegalAddImmediate(int64_t imm) const { - return TLI->isLegalAddImmediate(imm); + return getTLI()->isLegalAddImmediate(imm); } bool BasicTTI::isLegalICmpImmediate(int64_t imm) const { - return TLI->isLegalICmpImmediate(imm); + return getTLI()->isLegalICmpImmediate(imm); } bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, @@ -136,32 +147,52 @@ bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, AM.BaseOffs = BaseOffset; AM.HasBaseReg = HasBaseReg; AM.Scale = Scale; - return TLI->isLegalAddressingMode(AM, Ty); + return getTLI()->isLegalAddressingMode(AM, Ty); +} + +int BasicTTI::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) const { + TargetLoweringBase::AddrMode AM; + AM.BaseGV = BaseGV; + AM.BaseOffs = BaseOffset; + AM.HasBaseReg = HasBaseReg; + AM.Scale = Scale; + return getTLI()->getScalingFactorCost(AM, Ty); } bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const { - return TLI->isTruncateFree(Ty1, Ty2); + return getTLI()->isTruncateFree(Ty1, Ty2); } bool BasicTTI::isTypeLegal(Type *Ty) const { - EVT T = TLI->getValueType(Ty); - return TLI->isTypeLegal(T); + EVT T = getTLI()->getValueType(Ty); + return getTLI()->isTypeLegal(T); } unsigned BasicTTI::getJumpBufAlignment() const { - return TLI->getJumpBufAlignment(); + return getTLI()->getJumpBufAlignment(); } unsigned BasicTTI::getJumpBufSize() const { - return TLI->getJumpBufSize(); + return getTLI()->getJumpBufSize(); } bool BasicTTI::shouldBuildLookupTables() const { + const TargetLoweringBase *TLI = getTLI(); return TLI->supportJumpTables() && (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); } +bool BasicTTI::haveFastSqrt(Type *Ty) const { + const TargetLoweringBase *TLI = getTLI(); + EVT VT = TLI->getValueType(Ty); + return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); +} + +void BasicTTI::getUnrollingPreferences(Loop *, UnrollingPreferences &) const { } + //===----------------------------------------------------------------------===// // // Calls used by the vectorizers. @@ -199,6 +230,7 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, OperandValueKind) const { // Check if any of the operands are vector operands. + const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -245,6 +277,7 @@ unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { + const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -338,6 +371,7 @@ unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const { unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const { + const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -382,7 +416,7 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { assert(!Src->isVoidTy() && "Invalid type"); - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src); // Assume that all loads of legal types cost 1. return LT.first; @@ -420,15 +454,23 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, case Intrinsic::log10: ISD = ISD::FLOG10; break; case Intrinsic::log2: ISD = ISD::FLOG2; break; case Intrinsic::fabs: ISD = ISD::FABS; break; + case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break; case Intrinsic::floor: ISD = ISD::FFLOOR; break; case Intrinsic::ceil: ISD = ISD::FCEIL; break; case Intrinsic::trunc: ISD = ISD::FTRUNC; break; + case Intrinsic::nearbyint: + ISD = ISD::FNEARBYINT; break; case Intrinsic::rint: ISD = ISD::FRINT; break; + case Intrinsic::round: ISD = ISD::FROUND; break; case Intrinsic::pow: ISD = ISD::FPOW; break; case Intrinsic::fma: ISD = ISD::FMA; break; case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add? + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return 0; } + const TargetLoweringBase *TLI = getTLI(); std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy); if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { @@ -462,10 +504,24 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, } unsigned BasicTTI::getNumberOfParts(Type *Tp) const { - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Tp); return LT.first; } -unsigned BasicTTI::getAddressComputationCost(Type *Ty) const { +unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { return 0; } + +unsigned BasicTTI::getReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwise) const { + assert(Ty->isVectorTy() && "Expect a vector type"); + unsigned NumVecElts = Ty->getVectorNumElements(); + unsigned NumReduxLevels = Log2_32(NumVecElts); + unsigned ArithCost = NumReduxLevels * + TopTTI->getArithmeticInstrCost(Opcode, Ty); + // Assume the pairwise shuffles add a cost. + unsigned ShuffleCost = + NumReduxLevels * (IsPairwise + 1) * + TopTTI->getShuffleCost(SK_ExtractSubvector, Ty, NumVecElts / 2, Ty); + return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); +} diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp index f8cc3b3..9cd4208 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -135,8 +135,8 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { if (!I->isImplicitDef()) break; unsigned Reg = I->getOperand(0).getReg(); - ImpDefRegs.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) ImpDefRegs.insert(*SubRegs); ++I; } @@ -406,7 +406,8 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, /// MBB so that the part before the iterator falls into the part starting at the /// iterator. This returns the new MBB. MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, - MachineBasicBlock::iterator BBI1) { + MachineBasicBlock::iterator BBI1, + const BasicBlock *BB) { if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1)) return 0; @@ -414,7 +415,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // Create the fall-through block. MachineFunction::iterator MBBI = &CurMBB; - MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock()); + MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB); CurMBB.getParent()->insert(++MBBI, NewMBB); // Move all the successors of this block to the specified block. @@ -647,6 +648,7 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, /// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist /// only of the common tail. Create a block that does by splitting one. bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + MachineBasicBlock *SuccBB, unsigned maxCommonTailLength, unsigned &commonTailIndex) { commonTailIndex = 0; @@ -676,7 +678,12 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size " << maxCommonTailLength); - MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); + // If the split block unconditionally falls-thru to SuccBB, it will be + // merged. In control flow terms it should then take SuccBB's name. e.g. If + // SuccBB is an inner loop, the common tail is still part of the inner loop. + const BasicBlock *BB = (SuccBB && MBB->succ_size() == 1) ? + SuccBB->getBasicBlock() : MBB->getBasicBlock(); + MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI, BB); if (!newMBB) { DEBUG(dbgs() << "... failed!"); return false; @@ -784,7 +791,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, !SameTails[commonTailIndex].tailIsWholeBlock())) { // None of the blocks consist entirely of the common tail. // Split a block so that one does. - if (!CreateCommonTailOnlyBlock(PredBB, + if (!CreateCommonTailOnlyBlock(PredBB, SuccBB, maxCommonTailLength, commonTailIndex)) { RemoveBlocksWithHash(CurHash, SuccBB, PredBB); continue; diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h index df795df..0d15ed7 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.h +++ b/contrib/llvm/lib/CodeGen/BranchFolding.h @@ -1,4 +1,4 @@ -//===-- BranchFolding.h - Fold machine code branch instructions --*- C++ -*===// +//===-- BranchFolding.h - Fold machine code branch instructions -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -100,13 +100,15 @@ namespace llvm { void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest); MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, - MachineBasicBlock::iterator BBI1); + MachineBasicBlock::iterator BBI1, + const BasicBlock *BB); unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB); void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB); bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + MachineBasicBlock *SuccBB, unsigned maxCommonTailLength, unsigned &commonTailIndex); diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp index 38ae17d..4925c4d 100644 --- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -9,13 +9,12 @@ #define DEBUG_TYPE "calcspillweights" -#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -23,36 +22,22 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -char CalculateSpillWeights::ID = 0; -INITIALIZE_PASS_BEGIN(CalculateSpillWeights, "calcspillweights", - "Calculate spill weights", false, false) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights", - "Calculate spill weights", false, false) - -void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { - au.addRequired<LiveIntervals>(); - au.addRequired<MachineLoopInfo>(); - au.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(au); -} - -bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) { - +void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, + MachineFunction &MF, + const MachineLoopInfo &MLI, + const MachineBlockFrequencyInfo &MBFI, + VirtRegAuxInfo::NormalizingFn norm) { DEBUG(dbgs() << "********** Compute Spill Weights **********\n" << "********** Function: " << MF.getName() << '\n'); - LiveIntervals &LIS = getAnalysis<LiveIntervals>(); MachineRegisterInfo &MRI = MF.getRegInfo(); - VirtRegAuxInfo VRAI(MF, LIS, getAnalysis<MachineLoopInfo>()); + VirtRegAuxInfo VRAI(MF, LIS, MLI, MBFI, norm); for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI.reg_nodbg_empty(Reg)) continue; - VRAI.CalculateWeightAndHint(LIS.getInterval(Reg)); + VRAI.calculateSpillWeightAndHint(LIS.getInterval(Reg)); } - return false; } // Return the preferred allocation register for reg, given a COPY instruction. @@ -107,12 +92,12 @@ static bool isRematerializable(const LiveInterval &LI, return true; } -void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { +void +VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { MachineRegisterInfo &mri = MF.getRegInfo(); const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo(); MachineBasicBlock *mbb = 0; MachineLoop *loop = 0; - unsigned loopDepth = 0; bool isExiting = false; float totalWeight = 0; SmallPtrSet<MachineInstr*, 8> visited; @@ -140,14 +125,14 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { if (mi->getParent() != mbb) { mbb = mi->getParent(); loop = Loops.getLoopFor(mbb); - loopDepth = loop ? loop->getLoopDepth() : 0; isExiting = loop ? loop->isLoopExiting(mbb) : false; } // Calculate instr weight. bool reads, writes; tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); - weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth); + weight = LiveIntervals::getSpillWeight( + writes, reads, MBFI.getBlockFreq(mi->getParent())); // Give extra weight to what looks like a loop induction variable update. if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb)) @@ -198,5 +183,5 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { if (isRematerializable(li, LIS, *MF.getTarget().getInstrInfo())) totalWeight *= 0.5F; - li.weight = normalizeSpillWeight(totalWeight, li.getSize()); + li.weight = normalize(totalWeight, li.getSize()); } diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp index 75f4b96..fcfc9dc 100644 --- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp @@ -24,7 +24,7 @@ using namespace llvm; CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, - const TargetMachine &tm, SmallVector<CCValAssign, 16> &locs, + const TargetMachine &tm, SmallVectorImpl<CCValAssign> &locs, LLVMContext &C) : CallingConv(CC), IsVarArg(isVarArg), MF(mf), TM(tm), TRI(*TM.getRegisterInfo()), Locs(locs), Context(C), diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp index c641991..7430c53 100644 --- a/contrib/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp @@ -22,7 +22,6 @@ using namespace llvm; void llvm::initializeCodeGen(PassRegistry &Registry) { initializeBasicTTIPass(Registry); initializeBranchFolderPassPass(Registry); - initializeCalculateSpillWeightsPass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeEarlyIfConverterPass(Registry); initializeExpandPostRAPass(Registry); @@ -60,7 +59,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeStackProtectorPass(Registry); initializeStackColoringPass(Registry); initializeStackSlotColoringPass(Registry); - initializeStrongPHIEliminationPass(Registry); initializeTailDuplicatePassPass(Registry); initializeTargetPassConfigPass(Registry); initializeTwoAddressInstructionPassPass(Registry); diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 0eb74a4..18c8e0a 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -201,8 +201,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { if (MO.isUse() && Special) { if (!KeepRegs.test(Reg)) { - KeepRegs.set(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) KeepRegs.set(*SubRegs); } } @@ -361,7 +361,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin, unsigned AntiDepReg, unsigned LastNewReg, const TargetRegisterClass *RC, - SmallVector<unsigned, 2> &Forbid) + SmallVectorImpl<unsigned> &Forbid) { ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC); for (unsigned i = 0; i != Order.size(); ++i) { @@ -388,7 +388,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin, continue; // If NewReg overlaps any of the forbidden registers, we can't use it. bool Forbidden = false; - for (SmallVector<unsigned, 2>::iterator it = Forbid.begin(), + for (SmallVectorImpl<unsigned>::iterator it = Forbid.begin(), ite = Forbid.end(); it != ite; ++it) if (TRI->regsOverlap(NewReg, *it)) { Forbidden = true; diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h index df13dd3..565d20b 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h @@ -103,7 +103,7 @@ class TargetRegisterInfo; unsigned AntiDepReg, unsigned LastNewReg, const TargetRegisterClass *RC, - SmallVector<unsigned, 2> &Forbid); + SmallVectorImpl<unsigned> &Forbid); }; } diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp index 840a101..6619bcf 100644 --- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -160,7 +160,8 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, MachineBasicBlock::iterator EndItr) { assert(VLIWScheduler && "VLIW Scheduler is not initialized!"); VLIWScheduler->startBlock(MBB); - VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size()); + VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, + std::distance(BeginItr, EndItr)); VLIWScheduler->schedule(); // Generate MI -> SU map. diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index a54217f..5efe1ff 100644 --- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -154,11 +154,11 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - LivePhysRegs.reset(Reg); // Check the subreg set, not the alias set, because a def // of a super-register may still be partially live after // this def. - for (MCSubRegIterator SR(Reg, TRI); SR.isValid(); ++SR) + for (MCSubRegIterator SR(Reg, TRI,/*IncludeSelf=*/true); + SR.isValid(); ++SR) LivePhysRegs.reset(*SR); } } else if (MO.isRegMask()) { diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp index f27ec77..c7c1752 100644 --- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -33,7 +33,6 @@ STATISTIC(NumResumesLowered, "Number of resume calls lowered"); namespace { class DwarfEHPrepare : public FunctionPass { const TargetMachine *TM; - const TargetLoweringBase *TLI; // RewindFunction - _Unwind_Resume or the target equivalent. Constant *RewindFunction; @@ -43,9 +42,8 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - DwarfEHPrepare(const TargetMachine *tm) : - FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()), - RewindFunction(0) { + DwarfEHPrepare(const TargetMachine *TM) : + FunctionPass(ID), TM(TM), RewindFunction(0) { initializeDominatorTreePass(*PassRegistry::getPassRegistry()); } @@ -61,8 +59,8 @@ namespace { char DwarfEHPrepare::ID = 0; -FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) { - return new DwarfEHPrepare(tm); +FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) { + return new DwarfEHPrepare(TM); } /// GetExceptionObject - Return the exception object from the value passed into @@ -108,20 +106,18 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { /// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present /// into calls to the appropriate _Unwind_Resume function. bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { - bool UsesNewEH = false; SmallVector<ResumeInst*, 16> Resumes; for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { TerminatorInst *TI = I->getTerminator(); if (ResumeInst *RI = dyn_cast<ResumeInst>(TI)) Resumes.push_back(RI); - else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) - UsesNewEH = II->getUnwindDest()->isLandingPad(); } if (Resumes.empty()) - return UsesNewEH; + return false; // Find the rewind function if we didn't already. + const TargetLowering *TLI = TM->getTargetLowering(); if (!RewindFunction) { LLVMContext &Ctx = Resumes[0]->getContext(); FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp index 9b0e76f..031f19c 100644 --- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp +++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp @@ -23,6 +23,7 @@ #define DEBUG_TYPE "execution-fix" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Allocator.h" @@ -91,7 +92,7 @@ struct DomainValue { // First domain available. unsigned getFirstDomain() const { - return CountTrailingZeros_32(AvailableDomains); + return countTrailingZeros(AvailableDomains); } DomainValue() : Refs(0) { clear(); } @@ -136,6 +137,12 @@ class ExeDepsFix : public MachineFunctionPass { typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap; LiveOutMap LiveOuts; + /// List of undefined register reads in this block in forward order. + std::vector<std::pair<MachineInstr*, unsigned> > UndefReads; + + /// Storage for register unit liveness. + LiveRegUnits LiveUnits; + /// Current instruction number. /// The first instruction in each basic block is 0. int CurInstr; @@ -185,6 +192,8 @@ private: void processDefs(MachineInstr*, bool Kill); void visitSoftInstr(MachineInstr*, unsigned mask); void visitHardInstr(MachineInstr*, unsigned domain); + bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref); + void processUndefReads(MachineBasicBlock*); }; } @@ -341,6 +350,10 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // Reset instruction counter in each basic block. CurInstr = 0; + // Set up UndefReads to track undefined register reads. + UndefReads.clear(); + LiveUnits.clear(); + // Set up LiveRegs to represent registers entering MBB. if (!LiveRegs) LiveRegs = new LiveReg[NumRegs]; @@ -448,10 +461,46 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) { processDefs(MI, !DomP.first); } +/// \brief Return true to if it makes sense to break dependence on a partial def +/// or undef use. +bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, + unsigned Pref) { + int rx = regIndex(MI->getOperand(OpIdx).getReg()); + if (rx < 0) + return false; + + unsigned Clearance = CurInstr - LiveRegs[rx].Def; + DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); + + if (Pref > Clearance) { + DEBUG(dbgs() << ": Break dependency.\n"); + return true; + } + // The current clearance seems OK, but we may be ignoring a def from a + // back-edge. + if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { + DEBUG(dbgs() << ": OK .\n"); + return false; + } + // A def from an unprocessed back-edge may make us break this dependency. + DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); + return false; +} + // Update def-ages for registers defined by MI. // If Kill is set, also kill off DomainValues clobbered by the defs. +// +// Also break dependencies on partial defs and undef uses. void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { assert(!MI->isDebugValue() && "Won't process debug values"); + + // Break dependence on undef uses. Do this before updating LiveRegs below. + unsigned OpNum; + unsigned Pref = TII->getUndefRegClearance(MI, OpNum, TRI); + if (Pref) { + if (shouldBreakDependence(MI, OpNum, Pref)) + UndefReads.push_back(std::make_pair(MI, OpNum)); + } const MCInstrDesc &MCID = MI->getDesc(); for (unsigned i = 0, e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); @@ -471,37 +520,58 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr << '\t' << *MI); + // Check clearance before partial register updates. + // Call breakDependence before setting LiveRegs[rx].Def. + unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); + if (Pref && shouldBreakDependence(MI, i, Pref)) + TII->breakPartialRegDependency(MI, i, TRI); + // How many instructions since rx was last written? - unsigned Clearance = CurInstr - LiveRegs[rx].Def; LiveRegs[rx].Def = CurInstr; // Kill off domains redefined by generic instructions. if (Kill) kill(rx); + } + ++CurInstr; +} - // Verify clearance before partial register updates. - unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); - if (!Pref) - continue; - DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); - if (Pref > Clearance) { - DEBUG(dbgs() << ": Break dependency.\n"); - TII->breakPartialRegDependency(MI, i, TRI); - continue; - } - - // The current clearance seems OK, but we may be ignoring a def from a - // back-edge. - if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { - DEBUG(dbgs() << ": OK.\n"); - continue; - } +/// \break Break false dependencies on undefined register reads. +/// +/// Walk the block backward computing precise liveness. This is expensive, so we +/// only do it on demand. Note that the occurrence of undefined register reads +/// that should be broken is very rare, but when they occur we may have many in +/// a single block. +void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) { + if (UndefReads.empty()) + return; - // A def from an unprocessed back-edge may make us break this dependency. - DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); + // Collect this block's live out register units. + LiveUnits.init(TRI); + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + LiveUnits.addLiveIns(*SI, *TRI); } + MachineInstr *UndefMI = UndefReads.back().first; + unsigned OpIdx = UndefReads.back().second; - ++CurInstr; + for (MachineBasicBlock::reverse_iterator I = MBB->rbegin(), E = MBB->rend(); + I != E; ++I) { + // Update liveness, including the current instrucion's defs. + LiveUnits.stepBackward(*I, *TRI); + + if (UndefMI == &*I) { + if (!LiveUnits.contains(UndefMI->getOperand(OpIdx).getReg(), *TRI)) + TII->breakPartialRegDependency(UndefMI, OpIdx, TRI); + + UndefReads.pop_back(); + if (UndefReads.empty()) + return; + + UndefMI = UndefReads.back().first; + OpIdx = UndefReads.back().second; + } + } } // A hard instruction only works in one domain. All input registers will be @@ -549,7 +619,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Is it possible to use this collapsed register for free? if (dv->isCollapsed()) { // Restrict available domains to the ones in common with the operand. - // If there are no common domains, we must pay the cross-domain + // If there are no common domains, we must pay the cross-domain // penalty for this operand. if (common) available = common; } else if (common) @@ -564,7 +634,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // If the collapsed operands force a single domain, propagate the collapse. if (isPowerOf2_32(available)) { - unsigned domain = CountTrailingZeros_32(available); + unsigned domain = countTrailingZeros(available); TII->setExecutionDomain(mi, domain); visitHardInstr(mi, domain); return; @@ -573,7 +643,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Kill off any remaining uses that don't match available, and build a list of // incoming DomainValues that we want to merge. SmallVector<LiveReg, 4> Regs; - for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { + for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { int rx = *i; const LiveReg &LR = LiveRegs[rx]; // This useless DomainValue could have been missed above. @@ -583,7 +653,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { } // Sorted insertion. bool Inserted = false; - for (SmallVector<LiveReg, 4>::iterator i = Regs.begin(), e = Regs.end(); + for (SmallVectorImpl<LiveReg>::iterator i = Regs.begin(), e = Regs.end(); i != e && !Inserted; ++i) { if (LR.Def < i->Def) { Inserted = true; @@ -614,7 +684,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { continue; // If latest didn't merge, it is useless now. Kill all registers using it. - for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i) + for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) if (LiveRegs[*i].Value == Latest) kill(*i); } @@ -686,6 +756,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) visitInstr(I); + processUndefReads(MBB); leaveBasicBlock(MBB); } @@ -698,6 +769,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { ++I) if (!I->isDebugValue()) processDefs(I, false); + processUndefReads(MBB); leaveBasicBlock(MBB); } @@ -713,6 +785,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { delete[] FI->second; } LiveOuts.clear(); + UndefReads.clear(); Avail.clear(); Allocator.DestroyAll(); diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index 1611db8..6c73fff 100644 --- a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -104,7 +104,7 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { } if (DstSubReg == InsReg) { - // No need to insert an identify copy instruction. + // No need to insert an identity copy instruction. // Watch out for case like this: // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3 // We must leave %RAX live. diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp index 8264d6d..e2d0eb4 100644 --- a/contrib/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -22,6 +22,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -31,6 +33,8 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + using namespace llvm; // Hidden options for help debugging. @@ -150,14 +154,17 @@ namespace { /// BBAnalysis - Results of if-conversion feasibility analysis indexed by /// basic block number. std::vector<BBInfo> BBAnalysis; + TargetSchedModel SchedModel; const TargetLoweringBase *TLI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - const InstrItineraryData *InstrItins; const MachineBranchProbabilityInfo *MBPI; MachineRegisterInfo *MRI; + LiveRegUnits Redefs; + LiveRegUnits DontKill; + bool PreRegAlloc; bool MadeChange; int FnNum; @@ -198,11 +205,9 @@ namespace { void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl<MachineOperand> &Cond, - SmallSet<unsigned, 4> &Redefs, SmallSet<unsigned, 4> *LaterRedefs = 0); void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl<MachineOperand> &Cond, - SmallSet<unsigned, 4> &Redefs, bool IgnoreBr = false); void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true); @@ -267,7 +272,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getTarget().getRegisterInfo(); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); MRI = &MF.getRegInfo(); - InstrItins = MF.getTarget().getInstrItineraryData(); + + const TargetSubtargetInfo &ST = + MF.getTarget().getSubtarget<TargetSubtargetInfo>(); + SchedModel.init(*ST.getSchedModel(), &ST, TII); + if (!TII) return false; PreRegAlloc = MRI->isSSA(); @@ -666,32 +675,28 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { bool isPredicated = TII->isPredicated(I); bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch(); - if (!isCondBr) { - if (!isPredicated) { - BBI.NonPredSize++; - unsigned ExtraPredCost = 0; - unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, - &ExtraPredCost); - if (NumCycles > 1) - BBI.ExtraCost += NumCycles-1; - BBI.ExtraCost2 += ExtraPredCost; - } else if (!AlreadyPredicated) { - // FIXME: This instruction is already predicated before the - // if-conversion pass. It's probably something like a conditional move. - // Mark this block unpredicable for now. - BBI.IsUnpredicable = true; - return; - } + // A conditional branch is not predicable, but it may be eliminated. + if (isCondBr) + continue; + + if (!isPredicated) { + BBI.NonPredSize++; + unsigned ExtraPredCost = TII->getPredicationCost(&*I); + unsigned NumCycles = SchedModel.computeInstrLatency(&*I, false); + if (NumCycles > 1) + BBI.ExtraCost += NumCycles-1; + BBI.ExtraCost2 += ExtraPredCost; + } else if (!AlreadyPredicated) { + // FIXME: This instruction is already predicated before the + // if-conversion pass. It's probably something like a conditional move. + // Mark this block unpredicable for now. + BBI.IsUnpredicable = true; + return; } if (BBI.ClobbersPred && !isPredicated) { // Predicate modification instruction should end the block (except for // already predicated instructions and end of block branches). - if (isCondBr) { - // A conditional branch is not predicable, but it may be eliminated. - continue; - } - // Predicate may have been modified, the subsequent (currently) // unpredicated instructions cannot be correctly predicated. BBI.IsUnpredicable = true; @@ -720,9 +725,9 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, if (BBI.IsDone || BBI.IsUnpredicable) return false; - // If it is already predicated, check if its predicate subsumes the new - // predicate. - if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred)) + // If it is already predicated, check if the new predicate subsumes + // its predicate. + if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate)) return false; if (BBI.BrCond.size()) { @@ -961,64 +966,58 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) { BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); } -/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are -/// modeled as read + write (sort like two-address instructions). These -/// routines track register liveness and add implicit uses to if-converted -/// instructions to conform to the model. -static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs, - const TargetRegisterInfo *TRI) { - for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), - E = BB->livein_end(); I != E; ++I) { - unsigned Reg = *I; - Redefs.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - Redefs.insert(*SubRegs); - } -} - -static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs, - const TargetRegisterInfo *TRI, - bool AddImpUse = false) { - SmallVector<unsigned, 4> Defs; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) +/// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all +/// values defined in MI which are not live/used by MI. +static void UpdatePredRedefs(MachineInstr *MI, LiveRegUnits &Redefs, + const TargetRegisterInfo *TRI) { + for (ConstMIBundleOperands Ops(MI); Ops.isValid(); ++Ops) { + if (!Ops->isReg() || !Ops->isKill()) continue; - unsigned Reg = MO.getReg(); - if (!Reg) + unsigned Reg = Ops->getReg(); + if (Reg == 0) continue; - if (MO.isDef()) - Defs.push_back(Reg); - else if (MO.isKill()) { - Redefs.erase(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - Redefs.erase(*SubRegs); - } + Redefs.removeReg(Reg, *TRI); } - MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); - for (unsigned i = 0, e = Defs.size(); i != e; ++i) { - unsigned Reg = Defs[i]; - if (!Redefs.insert(Reg)) { - if (AddImpUse) - // Treat predicated update as read + write. - MIB.addReg(Reg, RegState::Implicit | RegState::Undef); - } else { - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - Redefs.insert(*SubRegs); - } + for (MIBundleOperands Ops(MI); Ops.isValid(); ++Ops) { + if (!Ops->isReg() || !Ops->isDef()) + continue; + unsigned Reg = Ops->getReg(); + if (Reg == 0 || Redefs.contains(Reg, *TRI)) + continue; + Redefs.addReg(Reg, *TRI); + + MachineOperand &Op = *Ops; + MachineInstr *MI = Op.getParent(); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); + MIB.addReg(Reg, RegState::Implicit | RegState::Undef); } } -static void UpdatePredRedefs(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator E, - SmallSet<unsigned,4> &Redefs, - const TargetRegisterInfo *TRI) { - while (I != E) { - UpdatePredRedefs(I, Redefs, TRI); - ++I; +/** + * Remove kill flags from operands with a registers in the @p DontKill set. + */ +static void RemoveKills(MachineInstr &MI, const LiveRegUnits &DontKill, + const MCRegisterInfo &MCRI) { + for (MIBundleOperands O(&MI); O.isValid(); ++O) { + if (!O->isReg() || !O->isKill()) + continue; + if (DontKill.contains(O->getReg(), MCRI)) + O->setIsKill(false); } } +/** + * Walks a range of machine instructions and removes kill flags for registers + * in the @p DontKill set. + */ +static void RemoveKills(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + const LiveRegUnits &DontKill, + const MCRegisterInfo &MCRI) { + for ( ; I != E; ++I) + RemoveKills(*I, DontKill, MCRI); +} + /// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. /// bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { @@ -1049,21 +1048,27 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { // Initialize liveins to the first BB. These are potentiall redefined by // predicated instructions. - SmallSet<unsigned, 4> Redefs; - InitPredRedefs(CvtBBI->BB, Redefs, TRI); - InitPredRedefs(NextBBI->BB, Redefs, TRI); + Redefs.init(TRI); + Redefs.addLiveIns(CvtBBI->BB, *TRI); + Redefs.addLiveIns(NextBBI->BB, *TRI); + + // Compute a set of registers which must not be killed by instructions in + // BB1: This is everything live-in to BB2. + DontKill.init(TRI); + DontKill.addLiveIns(NextBBI->BB, *TRI); if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. - CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs); + CopyAndPredicateBlock(BBI, *CvtBBI, Cond); // RemoveExtraEdges won't work if the block has an unanalyzable branch, so // explicitly remove CvtBBI as a successor. BBI.BB->removeSuccessor(CvtBBI->BB); } else { - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); + RemoveKills(CvtBBI->BB->begin(), CvtBBI->BB->end(), DontKill, *TRI); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); // Merge converted block into entry block. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -1148,16 +1153,18 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. - SmallSet<unsigned, 4> Redefs; - InitPredRedefs(CvtBBI->BB, Redefs, TRI); - InitPredRedefs(NextBBI->BB, Redefs, TRI); + Redefs.init(TRI); + Redefs.addLiveIns(CvtBBI->BB, *TRI); + Redefs.addLiveIns(NextBBI->BB, *TRI); + + DontKill.clear(); bool HasEarlyExit = CvtBBI->FalseBB != NULL; if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. - CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true); + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); // RemoveExtraEdges won't work if the block has an unanalyzable branch, so // explicitly remove CvtBBI as a successor. @@ -1165,7 +1172,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); // Now merge the entry of the triangle with the true block. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -1276,8 +1283,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. - SmallSet<unsigned, 4> Redefs; - InitPredRedefs(BBI1->BB, Redefs, TRI); + Redefs.init(TRI); + Redefs.addLiveIns(BBI1->BB, *TRI); // Remove the duplicated instructions at the beginnings of both paths. MachineBasicBlock::iterator DI1 = BBI1->BB->begin(); @@ -1304,7 +1311,19 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, --NumDups1; } - UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI); + // Compute a set of registers which must not be killed by instructions in BB1: + // This is everything used+live in BB2 after the duplicated instructions. We + // can compute this set by simulating liveness backwards from the end of BB2. + DontKill.init(TRI); + for (MachineBasicBlock::reverse_iterator I = BBI2->BB->rbegin(), + E = MachineBasicBlock::reverse_iterator(DI2); I != E; ++I) { + DontKill.stepBackward(*I, *TRI); + } + + for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E; + ++I) { + Redefs.stepForward(*I, *TRI); + } BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); BBI2->BB->erase(BBI2->BB->begin(), DI2); @@ -1322,6 +1341,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, } BBI1->BB->erase(DI1, BBI1->BB->end()); + // Kill flags in the true block for registers living into the false block + // must be removed. + RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI); + // Remove 'false' block branch and find the last instruction to predicate. BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); DI2 = BBI2->BB->end(); @@ -1362,8 +1385,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, } else if (!RedefsByFalse.count(Reg)) { // These are defined before ctrl flow reach the 'false' instructions. // They cannot be modified by the 'true' instructions. - ExtUses.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) ExtUses.insert(*SubRegs); } } @@ -1371,8 +1394,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, for (unsigned i = 0, e = Defs.size(); i != e; ++i) { unsigned Reg = Defs[i]; if (!ExtUses.count(Reg)) { - RedefsByFalse.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) RedefsByFalse.insert(*SubRegs); } } @@ -1380,10 +1403,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, } // Predicate the 'true' block. - PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs, &RedefsByFalse); + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, &RedefsByFalse); // Predicate the 'false' block. - PredicateBlock(*BBI2, DI2, *Cond2, Redefs); + PredicateBlock(*BBI2, DI2, *Cond2); // Merge the true block into the entry of the diamond. MergeBlocks(BBI, *BBI1, TailBB == 0); @@ -1458,7 +1481,6 @@ static bool MaySpeculate(const MachineInstr *MI, void IfConverter::PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl<MachineOperand> &Cond, - SmallSet<unsigned, 4> &Redefs, SmallSet<unsigned, 4> *LaterRedefs) { bool AnyUnpred = false; bool MaySpec = LaterRedefs != 0; @@ -1484,7 +1506,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, // If the predicated instruction now redefines a register as the result of // if-conversion, add an implicit kill. - UpdatePredRedefs(I, Redefs, TRI, true); + UpdatePredRedefs(I, Redefs, TRI); } std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); @@ -1501,7 +1523,6 @@ void IfConverter::PredicateBlock(BBInfo &BBI, /// the destination block. Skip end of block branches if IgnoreBr is true. void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl<MachineOperand> &Cond, - SmallSet<unsigned, 4> &Redefs, bool IgnoreBr) { MachineFunction &MF = *ToBBI.BB->getParent(); @@ -1514,8 +1535,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, MachineInstr *MI = MF.CloneMachineInstr(I); ToBBI.BB->insert(ToBBI.BB->end(), MI); ToBBI.NonPredSize++; - unsigned ExtraPredCost = 0; - unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost); + unsigned ExtraPredCost = TII->getPredicationCost(&*I); + unsigned NumCycles = SchedModel.computeInstrLatency(&*I, false); if (NumCycles > 1) ToBBI.ExtraCost += NumCycles-1; ToBBI.ExtraCost2 += ExtraPredCost; @@ -1531,7 +1552,11 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, // If the predicated instruction now redefines a register as the result of // if-conversion, add an implicit kill. - UpdatePredRedefs(MI, Redefs, TRI, true); + UpdatePredRedefs(MI, Redefs, TRI); + + // Some kill flags may not be correct anymore. + if (!DontKill.empty()) + RemoveKills(*MI, DontKill, *TRI); } if (!IgnoreBr) { diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp index 35295fe..bb0e642 100644 --- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "regalloc" #include "Spiller.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -21,8 +22,10 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -63,6 +66,7 @@ class InlineSpiller : public Spiller { MachineRegisterInfo &MRI; const TargetInstrInfo &TII; const TargetRegisterInfo &TRI; + const MachineBlockFrequencyInfo &MBFI; // Variables that are valid during spill(), but used by multiple methods. LiveRangeEdit *Edit; @@ -146,7 +150,8 @@ public: MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()), TII(*mf.getTarget().getInstrInfo()), - TRI(*mf.getTarget().getRegisterInfo()) {} + TRI(*mf.getTarget().getRegisterInfo()), + MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {} void spill(LiveRangeEdit &); @@ -174,10 +179,8 @@ private: bool coalesceStackAccess(MachineInstr *MI, unsigned Reg); bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >, MachineInstr *LoadMI = 0); - void insertReload(LiveInterval &NewLI, SlotIndex, - MachineBasicBlock::iterator MI); - void insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, - SlotIndex, MachineBasicBlock::iterator MI); + void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI); + void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI); void spillAroundUses(unsigned Reg); void spillAll(); @@ -337,10 +340,12 @@ static raw_ostream &operator<<(raw_ostream &OS, /// propagateSiblingValue - Propagate the value in SVI to dependents if it is /// known. Otherwise remember the dependency for later. /// -/// @param SVI SibValues entry to propagate. +/// @param SVIIter SibValues entry to propagate. /// @param VNI Dependent value, or NULL to propagate to all saved dependents. -void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVI, +void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter, VNInfo *VNI) { + SibValueMap::value_type *SVI = &*SVIIter; + // When VNI is non-NULL, add it to SVI's deps, and only propagate to that. TinyPtrVector<VNInfo*> FirstDeps; if (VNI) { @@ -352,14 +357,12 @@ void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVI, if (!SVI->second.hasDef()) return; - // Work list of values to propagate. It would be nice to use a SetVector - // here, but then we would be forced to use a SmallSet. - SmallVector<SibValueMap::iterator, 8> WorkList(1, SVI); - SmallPtrSet<VNInfo*, 8> WorkSet; + // Work list of values to propagate. + SmallSetVector<SibValueMap::value_type *, 8> WorkList; + WorkList.insert(SVI); do { SVI = WorkList.pop_back_val(); - WorkSet.erase(SVI->first); TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps; VNI = 0; @@ -450,8 +453,7 @@ void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVI, continue; // Something changed in DepSVI. Propagate to dependents. - if (WorkSet.insert(DepSVI->first)) - WorkList.push_back(DepSVI); + WorkList.insert(&*DepSVI); DEBUG(dbgs() << " update " << DepSVI->first->id << '@' << DepSVI->first->def << " to:\t" << DepSV); @@ -576,7 +578,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { if (isSibling(SrcReg)) { LiveInterval &SrcLI = LIS.getInterval(SrcReg); - LiveRangeQuery SrcQ(SrcLI, VNI->def); + LiveQueryResult SrcQ = SrcLI.Query(VNI->def); assert(SrcQ.valueIn() && "Copy from non-existing value"); // Check if this COPY kills its source. SVI->second.KillsSource = SrcQ.isKill(); @@ -881,12 +883,12 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, } // Alocate a new register for the remat. - LiveInterval &NewLI = Edit->createFrom(Original); - NewLI.markNotSpillable(); + unsigned NewVReg = Edit->createFrom(Original); // Finally we can rematerialize OrigMI before MI. - SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM, + SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewVReg, RM, TRI); + (void)DefIdx; DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *LIS.getInstructionFromIndex(DefIdx)); @@ -894,15 +896,12 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = MI->getOperand(Ops[i].second); if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { - MO.setReg(NewLI.reg); + MO.setReg(NewVReg); MO.setIsKill(); } } - DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); + DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI << '\n'); - VNInfo *DefVNI = NewLI.getNextValue(DefIdx, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(DefIdx, UseIdx.getRegSlot(), DefVNI)); - DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); ++NumRemats; return true; } @@ -1005,6 +1004,40 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { return true; } +#if !defined(NDEBUG) +// Dump the range of instructions from B to E with their slot indexes. +static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B, + MachineBasicBlock::iterator E, + LiveIntervals const &LIS, + const char *const header, + unsigned VReg =0) { + char NextLine = '\n'; + char SlotIndent = '\t'; + + if (llvm::next(B) == E) { + NextLine = ' '; + SlotIndent = ' '; + } + + dbgs() << '\t' << header << ": " << NextLine; + + for (MachineBasicBlock::iterator I = B; I != E; ++I) { + SlotIndex Idx = LIS.getInstructionIndex(I).getRegSlot(); + + // If a register was passed in and this instruction has it as a + // destination that is marked as an early clobber, print the + // early-clobber slot index. + if (VReg) { + MachineOperand *MO = I->findRegisterDefOperand(VReg); + if (MO && MO->isEarlyClobber()) + Idx = Idx.getRegSlot(true); + } + + dbgs() << SlotIndent << Idx << '\t' << *I; + } +} +#endif + /// foldMemoryOperand - Try folding stack slot references in Ops into their /// instructions. /// @@ -1024,6 +1057,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, bool WasCopy = MI->isCopy(); unsigned ImpReg = 0; + bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::PATCHPOINT || + MI->getOpcode() == TargetOpcode::STACKMAP); + // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied // operands. SmallVector<unsigned, 8> FoldOps; @@ -1035,7 +1071,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, continue; } // FIXME: Teach targets to deal with subregs. - if (MO.getSubReg()) + if (!SpillSubRegs && MO.getSubReg()) return false; // We cannot fold a load instruction into a def. if (LoadMI && MO.isDef()) @@ -1045,14 +1081,52 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, FoldOps.push_back(Idx); } + MachineInstrSpan MIS(MI); + MachineInstr *FoldMI = LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI) : TII.foldMemoryOperand(MI, FoldOps, StackSlot); if (!FoldMI) return false; + + // Remove LIS for any dead defs in the original MI not in FoldMI. + for (MIBundleOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || + MRI.isReserved(Reg)) { + continue; + } + MIBundleOperands::PhysRegInfo RI = + MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI); + if (MO->readsReg()) { + assert(RI.Reads && "Cannot fold physreg reader"); + continue; + } + if (RI.Defines) + continue; + // FoldMI does not define this physreg. Remove the LI segment. + assert(MO->isDead() && "Cannot fold physreg def"); + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) { + if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) { + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + if (VNInfo *VNI = LR->getVNInfoAt(Idx)) + LR->removeValNo(VNI); + } + } + } + LIS.ReplaceMachineInstrInMaps(MI, FoldMI); MI->eraseFromParent(); + // Insert any new instructions other than FoldMI into the LIS maps. + assert(!MIS.empty() && "Unexpected empty span of instructions!"); + for (MachineBasicBlock::iterator MII = MIS.begin(), End = MIS.end(); + MII != End; ++MII) + if (&*MII != FoldMI) + LIS.InsertMachineInstrInMaps(&*MII); + // TII.foldMemoryOperand may have left some implicit operands on the // instruction. Strip them. if (ImpReg) @@ -1064,8 +1138,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, FoldMI->RemoveOperand(i - 1); } - DEBUG(dbgs() << "\tfolded: " << LIS.getInstructionIndex(FoldMI) << '\t' - << *FoldMI); + DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS, + "folded")); + if (!WasCopy) ++NumFolded; else if (Ops.front().second == 0) @@ -1075,36 +1150,35 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, return true; } -/// insertReload - Insert a reload of NewLI.reg before MI. -void InlineSpiller::insertReload(LiveInterval &NewLI, +void InlineSpiller::insertReload(unsigned NewVReg, SlotIndex Idx, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot, - MRI.getRegClass(NewLI.reg), &TRI); - --MI; // Point to load instruction. - SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); - // Some (out-of-tree) targets have EC reload instructions. - if (MachineOperand *MO = MI->findRegisterDefOperand(NewLI.reg)) - if (MO->isEarlyClobber()) - LoadIdx = LoadIdx.getRegSlot(true); - DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); - VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); + + MachineInstrSpan MIS(MI); + TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot, + MRI.getRegClass(NewVReg), &TRI); + + LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI); + + DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload", + NewVReg)); ++NumReloads; } -/// insertSpill - Insert a spill of NewLI.reg after MI. -void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, - SlotIndex Idx, MachineBasicBlock::iterator MI) { +/// insertSpill - Insert a spill of NewVReg after MI. +void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, + MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot, - MRI.getRegClass(NewLI.reg), &TRI); - --MI; // Point to store instruction. - SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); - DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); - VNInfo *StoreVNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); + + MachineInstrSpan MIS(MI); + TII.storeRegToStackSlot(MBB, llvm::next(MI), NewVReg, isKill, StackSlot, + MRI.getRegClass(NewVReg), &TRI); + + LIS.InsertMachineInstrRangeInMaps(llvm::next(MI), MIS.end()); + + DEBUG(dumpMachineInstrRangeWithSlotIndex(llvm::next(MI), MIS.end(), LIS, + "spill")); ++NumSpills; } @@ -1120,18 +1194,14 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Debug values are not allowed to affect codegen. if (MI->isDebugValue()) { // Modify DBG_VALUE now that the value is in a spill slot. - uint64_t Offset = MI->getOperand(1).getImm(); + bool IsIndirect = MI->isIndirectDebugValue(); + uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; const MDNode *MDPtr = MI->getOperand(2).getMetadata(); DebugLoc DL = MI->getDebugLoc(); - if (MachineInstr *NewDV = TII.emitFrameIndexDebugValue(MF, StackSlot, - Offset, MDPtr, DL)) { - DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); - MachineBasicBlock *MBB = MI->getParent(); - MBB->insert(MBB->erase(MI), NewDV); - } else { - DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); - MI->eraseFromParent(); - } + DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); + MachineBasicBlock *MBB = MI->getParent(); + BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(StackSlot).addImm(Offset).addMetadata(MDPtr); continue; } @@ -1184,19 +1254,18 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { if (foldMemoryOperand(Ops)) continue; - // Allocate interval around instruction. + // Create a new virtual register for spill/fill. // FIXME: Infer regclass from instruction alone. - LiveInterval &NewLI = Edit->createFrom(Reg); - NewLI.markNotSpillable(); + unsigned NewVReg = Edit->createFrom(Reg); if (RI.Reads) - insertReload(NewLI, Idx, MI); + insertReload(NewVReg, Idx, MI); // Rewrite instruction operands. bool hasLiveDef = false; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second); - MO.setReg(NewLI.reg); + MO.setReg(NewVReg); if (MO.isUse()) { if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second)) MO.setIsKill(); @@ -1205,21 +1274,12 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { hasLiveDef = true; } } - DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI); + DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n'); // FIXME: Use a second vreg if instruction has no tied ops. - if (RI.Writes) { + if (RI.Writes) if (hasLiveDef) - insertSpill(NewLI, OldLI, Idx, MI); - else { - // This instruction defines a dead value. We don't need to spill it, - // but do create a live range for the dead value. - VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(Idx, Idx.getDeadSlot(), VNI)); - } - } - - DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); + insertSpill(NewVReg, true, MI); } } @@ -1238,8 +1298,8 @@ void InlineSpiller::spillAll() { assert(StackInt->getNumValNums() == 1 && "Bad stack interval values"); for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) - StackInt->MergeRangesInAsValue(LIS.getInterval(RegsToSpill[i]), - StackInt->getValNumInfo(0)); + StackInt->MergeSegmentsInAsValue(LIS.getInterval(RegsToSpill[i]), + StackInt->getValNumInfo(0)); DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); // Spill around uses of all RegsToSpill. @@ -1280,8 +1340,8 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { DEBUG(dbgs() << "Inline spilling " << MRI.getRegClass(edit.getReg())->getName() - << ':' << PrintReg(edit.getReg()) << ' ' << edit.getParent() - << "\nFrom original " << LIS.getInterval(Original) << '\n'); + << ':' << edit.getParent() + << "\nFrom original " << PrintReg(Original) << '\n'); assert(edit.getParent().isSpillable() && "Attempting to spill already spilled value."); assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); @@ -1294,5 +1354,5 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { if (!RegsToSpill.empty()) spillAll(); - Edit->calculateRegClassAndHint(MF, Loops); + Edit->calculateRegClassAndHint(MF, Loops, MBFI); } diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp index a8e711e..427225d 100644 --- a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp @@ -204,11 +204,11 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { // Fixed interference. for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { LiveInterval::iterator &I = RegUnits[i].FixedI; - LiveInterval *LI = RegUnits[i].Fixed; - if (I == LI->end() || I->start >= Stop) + LiveRange *LR = RegUnits[i].Fixed; + if (I == LR->end() || I->start >= Stop) continue; - I = LI->advanceTo(I, Stop); - bool Backup = I == LI->end() || I->start >= Stop; + I = LR->advanceTo(I, Stop); + bool Backup = I == LR->end() || I->start >= Stop; if (Backup) --I; SlotIndex StopI = I->end; diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm/lib/CodeGen/InterferenceCache.h index c02fb9a..800f705 100644 --- a/contrib/llvm/lib/CodeGen/InterferenceCache.h +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.h @@ -72,7 +72,7 @@ class InterferenceCache { unsigned VirtTag; /// Fixed interference in RegUnit. - LiveInterval *Fixed; + LiveRange *Fixed; /// Iterator pointing into the fixed RegUnit interference. LiveInterval::iterator FixedI; diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp index d894f66..c38d4fb 100644 --- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -485,11 +485,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memset: { - Type *IntPtr = TD.getIntPtrType(Context); + Value *Op0 = CI->getArgOperand(0); + Type *IntPtr = TD.getIntPtrType(Op0->getType()); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; - Ops[0] = CI->getArgOperand(0); + Ops[0] = Op0; // Extend the amount to i32. Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), Type::getInt32Ty(Context), diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 1a09837..ad2c553 100644 --- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -62,6 +62,17 @@ static bool getVerboseAsm() { llvm_unreachable("Invalid verbose asm state"); } +void LLVMTargetMachine::initAsmInfo() { + AsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(), TargetTriple); + // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, + // and if the old one gets included then MCAsmInfo will be NULL and + // we'll crash later. + // Provide the user with a useful error message about what's wrong. + assert(AsmInfo && "MCAsmInfo not initialized. " + "Make sure you include the correct TargetSelect.h" + "and that InitializeAllTargetMCs() is being invoked!"); +} + LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, StringRef CPU, StringRef FS, TargetOptions Options, @@ -69,18 +80,10 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, CodeGenOpt::Level OL) : TargetMachine(T, Triple, CPU, FS, Options) { CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL); - AsmInfo = T.createMCAsmInfo(Triple); - // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, - // and if the old one gets included then MCAsmInfo will be NULL and - // we'll crash later. - // Provide the user with a useful error message about what's wrong. - assert(AsmInfo && "MCAsmInfo not initialized." - "Make sure you include the correct TargetSelect.h" - "and that InitializeAllTargetMCs() is being invoked!"); } void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) { - PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createBasicTargetTransformInfoPass(this)); } /// addPassesToX helper drives creation and initialization of TargetPassConfig. @@ -112,7 +115,6 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(), &TM->getTargetLowering()->getObjFileLowering()); PM.add(MMI); - MCContext *Context = &MMI->getContext(); // Return the MCContext by-ref. // Set up a MachineFunction for the rest of CodeGen to work on. PM.add(new MachineFunctionAnalysis(*TM)); @@ -131,7 +133,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, PassConfig->setInitialized(); - return Context; + return &MMI->getContext(); } bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, @@ -161,6 +163,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, const MCAsmInfo &MAI = *getMCAsmInfo(); const MCRegisterInfo &MRI = *getRegisterInfo(); + const MCInstrInfo &MII = *getInstrInfo(); const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); OwningPtr<MCStreamer> AsmStreamer; @@ -168,19 +171,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_AssemblyFile: { MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, - *getInstrInfo(), - Context->getRegisterInfo(), STI); + MII, MRI, STI); // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = 0; - MCAsmBackend *MAB = 0; - if (ShowMCEncoding) { - const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); - MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, - *Context); - MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); - } + if (ShowMCEncoding) + MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), + TargetCPU); MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, getVerboseAsm(), hasMCUseLoc(), @@ -195,9 +194,9 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, - STI, *Context); - MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, + *Context); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); if (MCE == 0 || MAB == 0) return true; @@ -232,7 +231,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, /// addPassesToEmitMachineCode - Add passes to the specified pass manager to /// get machine code emitted. This uses a JITCodeEmitter object to handle /// actually outputting the machine code and resolving things like the address -/// of functions. This method should returns true if machine code emission is +/// of functions. This method should return true if machine code emission is /// not supported. /// bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, @@ -271,7 +270,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, *Ctx); - MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), + TargetCPU); if (MCE == 0 || MAB == 0) return true; diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp index 8172154..ffe407a 100644 --- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp @@ -212,15 +212,15 @@ LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) { /// constructScopeNest void LexicalScopes::constructScopeNest(LexicalScope *Scope) { - assert (Scope && "Unable to calculate scop edominance graph!"); + assert (Scope && "Unable to calculate scope dominance graph!"); SmallVector<LexicalScope *, 4> WorkStack; WorkStack.push_back(Scope); unsigned Counter = 0; while (!WorkStack.empty()) { LexicalScope *WS = WorkStack.back(); - const SmallVector<LexicalScope *, 4> &Children = WS->getChildren(); + const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren(); bool visitedChildren = false; - for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(), + for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(), SE = Children.end(); SI != SE; ++SI) { LexicalScope *ChildScope = *SI; if (!ChildScope->getDFSOut()) { @@ -279,8 +279,8 @@ getMachineBasicBlocks(DebugLoc DL, return; } - SmallVector<InsnRange, 4> &InsnRanges = Scope->getRanges(); - for (SmallVector<InsnRange, 4>::iterator I = InsnRanges.begin(), + SmallVectorImpl<InsnRange> &InsnRanges = Scope->getRanges(); + for (SmallVectorImpl<InsnRange>::iterator I = InsnRanges.begin(), E = InsnRanges.end(); I != E; ++I) { InsnRange &R = *I; MBBs.insert(R.first->getParent()); diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp index 0b117ac..25645e0 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -108,6 +108,7 @@ class LDVImpl; class UserValue { const MDNode *variable; ///< The debug info variable we are part of. unsigned offset; ///< Byte offset into variable. + bool IsIndirect; ///< true if this is a register-indirect+offset value. DebugLoc dl; ///< The debug location for the variable. This is ///< used by dwarf writer to find lexical scope. UserValue *leader; ///< Equivalence class leader. @@ -130,13 +131,15 @@ class UserValue { /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs /// is live. Returns true if any changes were made. - bool splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs); + bool splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, + LiveIntervals &LIS); public: /// UserValue - Create a new UserValue. - UserValue(const MDNode *var, unsigned o, DebugLoc L, + UserValue(const MDNode *var, unsigned o, bool i, DebugLoc L, LocMap::Allocator &alloc) - : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc) + : variable(var), offset(o), IsIndirect(i), dl(L), leader(this), + next(0), locInts(alloc) {} /// getLeader - Get the leader of this value's equivalence class. @@ -217,13 +220,13 @@ public: /// End points where VNI is no longer live are added to Kills. /// @param Idx Starting point for the definition. /// @param LocNo Location number to propagate. - /// @param LI Restrict liveness to where LI has the value VNI. May be null. - /// @param VNI When LI is not null, this is the value to restrict to. + /// @param LR Restrict liveness to where LR has the value VNI. May be null. + /// @param VNI When LR is not null, this is the value to restrict to. /// @param Kills Append end points of VNI's live range to Kills. /// @param LIS Live intervals analysis. /// @param MDT Dominator tree. void extendDef(SlotIndex Idx, unsigned LocNo, - LiveInterval *LI, const VNInfo *VNI, + LiveRange *LR, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS); @@ -249,7 +252,8 @@ public: /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is /// live. Returns true if any changes were made. - bool splitRegister(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs); + bool splitRegister(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, + LiveIntervals &LIS); /// rewriteLocations - Rewrite virtual register locations according to the /// provided virtual register map. @@ -299,7 +303,8 @@ class LDVImpl { UVMap userVarMap; /// getUserValue - Find or create a UserValue. - UserValue *getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL); + UserValue *getUserValue(const MDNode *Var, unsigned Offset, + bool IsIndirect, DebugLoc DL); /// lookupVirtReg - Find the EC leader for VirtReg or null. UserValue *lookupVirtReg(unsigned VirtReg); @@ -342,7 +347,7 @@ public: void mapVirtReg(unsigned VirtReg, UserValue *EC); /// splitRegister - Replace all references to OldReg with NewRegs. - void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs); + void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs); /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. void emitDebugValues(VirtRegMap *VRM); @@ -414,7 +419,7 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) { } UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset, - DebugLoc DL) { + bool IsIndirect, DebugLoc DL) { UserValue *&Leader = userVarMap[Var]; if (Leader) { UserValue *UV = Leader->getLeader(); @@ -424,7 +429,7 @@ UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset, return UV; } - UserValue *UV = new UserValue(Var, Offset, DL, allocator); + UserValue *UV = new UserValue(Var, Offset, IsIndirect, DL, allocator); userValues.push_back(UV); Leader = UserValue::merge(Leader, UV); return UV; @@ -445,15 +450,18 @@ UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { // DBG_VALUE loc, offset, variable if (MI->getNumOperands() != 3 || - !MI->getOperand(1).isImm() || !MI->getOperand(2).isMetadata()) { + !(MI->getOperand(1).isReg() || MI->getOperand(1).isImm()) || + !MI->getOperand(2).isMetadata()) { DEBUG(dbgs() << "Can't handle " << *MI); return false; } // Get or create the UserValue for (variable,offset). - unsigned Offset = MI->getOperand(1).getImm(); + bool IsIndirect = MI->isIndirectDebugValue(); + unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; const MDNode *Var = MI->getOperand(2).getMetadata(); - UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc()); + //here. + UserValue *UV = getUserValue(Var, Offset, IsIndirect, MI->getDebugLoc()); UV->addDef(Idx, MI->getOperand(0)); return true; } @@ -487,7 +495,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { } void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, - LiveInterval *LI, const VNInfo *VNI, + LiveRange *LR, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS) { @@ -501,15 +509,15 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, // Limit to VNI's live range. bool ToEnd = true; - if (LI && VNI) { - LiveRange *Range = LI->getLiveRangeContaining(Start); - if (!Range || Range->valno != VNI) { + if (LR && VNI) { + LiveInterval::Segment *Segment = LR->getSegmentContaining(Start); + if (!Segment || Segment->valno != VNI) { if (Kills) Kills->push_back(Start); continue; } - if (Range->end < Stop) - Stop = Range->end, ToEnd = false; + if (Segment->end < Stop) + Stop = Segment->end, ToEnd = false; } // There could already be a short def at Start. @@ -661,10 +669,10 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, // For physregs, use the live range of the first regunit as a guide. unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI); - LiveInterval *LI = &LIS.getRegUnit(Unit); - const VNInfo *VNI = LI->getVNInfoAt(Idx); + LiveRange *LR = &LIS.getRegUnit(Unit); + const VNInfo *VNI = LR->getVNInfoAt(Idx); // Don't track copies from physregs, it is too expensive. - extendDef(Idx, LocNo, LI, VNI, 0, LIS, MDT, UVS); + extendDef(Idx, LocNo, LR, VNI, 0, LIS, MDT, UVS); } // Finally, erase all the undefs. @@ -724,7 +732,8 @@ LiveDebugVariables::~LiveDebugVariables() { //===----------------------------------------------------------------------===// bool -UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) { +UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, + LiveIntervals& LIS) { DEBUG({ dbgs() << "Splitting Loc" << OldLocNo << '\t'; print(dbgs(), 0); @@ -733,7 +742,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) { LocMap::iterator LocMapI; LocMapI.setMap(locInts); for (unsigned i = 0; i != NewRegs.size(); ++i) { - LiveInterval *LI = NewRegs[i]; + LiveInterval *LI = &LIS.getInterval(NewRegs[i]); if (LI->empty()) continue; @@ -822,7 +831,8 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) { } bool -UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { +UserValue::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, + LiveIntervals &LIS) { bool DidChange = false; // Split locations referring to OldReg. Iterate backwards so splitLocation can // safely erase unused locations. @@ -831,15 +841,15 @@ UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { const MachineOperand *Loc = &locations[LocNo]; if (!Loc->isReg() || Loc->getReg() != OldReg) continue; - DidChange |= splitLocation(LocNo, NewRegs); + DidChange |= splitLocation(LocNo, NewRegs, LIS); } return DidChange; } -void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { +void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs) { bool DidChange = false; for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext()) - DidChange |= UV->splitRegister(OldReg, NewRegs); + DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS); if (!DidChange) return; @@ -847,11 +857,11 @@ void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { // Map all of the new virtual registers. UserValue *UV = lookupVirtReg(OldReg); for (unsigned i = 0; i != NewRegs.size(); ++i) - mapVirtReg(NewRegs[i]->reg, UV); + mapVirtReg(NewRegs[i], UV); } void LiveDebugVariables:: -splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { +splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS) { if (pImpl) static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs); } @@ -921,19 +931,12 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, MachineOperand &Loc = locations[LocNo]; ++NumInsertedDebugValues; - // Frame index locations may require a target callback. - if (Loc.isFI()) { - MachineInstr *MI = TII.emitFrameIndexDebugValue(*MBB->getParent(), - Loc.getIndex(), offset, variable, - findDebugLoc()); - if (MI) { - MBB->insert(I, MI); - return; - } - } - // This is not a frame index, or the target is happy with a standard FI. - BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) - .addOperand(Loc).addImm(offset).addMetadata(variable); + if (Loc.isReg()) + BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), + IsIndirect, Loc.getReg(), offset, variable); + else + BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) + .addOperand(Loc).addImm(offset).addMetadata(variable); } void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, @@ -992,4 +995,3 @@ void LiveDebugVariables::dump() { static_cast<LDVImpl*>(pImpl)->print(dbgs()); } #endif - diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h index 3ce3c39..58a3f0f 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h @@ -27,6 +27,7 @@ namespace llvm { class LiveInterval; +class LiveIntervals; class VirtRegMap; class LiveDebugVariables : public MachineFunctionPass { @@ -47,7 +48,8 @@ public: /// splitRegister - Move any user variables in OldReg to the live ranges in /// NewRegs where they are live. Mark the values as unavailable where no new /// register is live. - void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs); + void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, + LiveIntervals &LIS); /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes /// that happened during register allocation. diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp index dccd847..2b8feb8 100644 --- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -9,12 +9,12 @@ // // This file implements the LiveRange and LiveInterval classes. Given some // numbering of each the machine instructions an interval [i, j) is said to be a -// live interval for register v if there is no instruction with number j' > j +// live range for register v if there is no instruction with number j' >= j // such that v is live at j' and there is no instruction with number i' < i such -// that v is live at i'. In this implementation intervals can have holes, -// i.e. an interval might look like [1,20), [50,65), [1000,1001). Each -// individual range is represented as an instance of LiveRange, and the whole -// interval is represented as an instance of LiveInterval. +// that v is live at i'. In this implementation ranges can have holes, +// i.e. a range might look like [1,20), [50,65), [1000,1001). Each +// individual segment is represented as an instance of LiveRange::Segment, +// and the whole range is represented as an instance of LiveRange. // //===----------------------------------------------------------------------===// @@ -31,14 +31,14 @@ #include <algorithm> using namespace llvm; -LiveInterval::iterator LiveInterval::find(SlotIndex Pos) { +LiveRange::iterator LiveRange::find(SlotIndex Pos) { // This algorithm is basically std::upper_bound. // Unfortunately, std::upper_bound cannot be used with mixed types until we // adopt C++0x. Many libraries can do it, but not all. if (empty() || Pos >= endIndex()) return end(); iterator I = begin(); - size_t Len = ranges.size(); + size_t Len = size(); do { size_t Mid = Len >> 1; if (Pos < I[Mid].end) @@ -49,13 +49,13 @@ LiveInterval::iterator LiveInterval::find(SlotIndex Pos) { return I; } -VNInfo *LiveInterval::createDeadDef(SlotIndex Def, - VNInfo::Allocator &VNInfoAllocator) { +VNInfo *LiveRange::createDeadDef(SlotIndex Def, + VNInfo::Allocator &VNInfoAllocator) { assert(!Def.isDead() && "Cannot define a value at the dead slot"); iterator I = find(Def); if (I == end()) { VNInfo *VNI = getNextValue(Def, VNInfoAllocator); - ranges.push_back(LiveRange(Def, Def.getDeadSlot(), VNI)); + segments.push_back(Segment(Def, Def.getDeadSlot(), VNI)); return VNI; } if (SlotIndex::isSameInstr(Def, I->start)) { @@ -73,11 +73,11 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, } assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def"); VNInfo *VNI = getNextValue(Def, VNInfoAllocator); - ranges.insert(I, LiveRange(Def, Def.getDeadSlot(), VNI)); + segments.insert(I, Segment(Def, Def.getDeadSlot(), VNI)); return VNI; } -// overlaps - Return true if the intersection of the two live intervals is +// overlaps - Return true if the intersection of the two live ranges is // not empty. // // An example for overlaps(): @@ -86,7 +86,7 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, // 4: B = ... // 8: C = A + B ;; last use of A // -// The live intervals should look like: +// The live ranges should look like: // // A = [3, 11) // B = [7, x) @@ -95,9 +95,9 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, // A->overlaps(C) should return false since we want to be able to join // A and C. // -bool LiveInterval::overlapsFrom(const LiveInterval& other, - const_iterator StartPos) const { - assert(!empty() && "empty interval"); +bool LiveRange::overlapsFrom(const LiveRange& other, + const_iterator StartPos) const { + assert(!empty() && "empty range"); const_iterator i = begin(); const_iterator ie = end(); const_iterator j = StartPos; @@ -108,13 +108,13 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other, if (i->start < j->start) { i = std::upper_bound(i, ie, j->start); - if (i != ranges.begin()) --i; + if (i != begin()) --i; } else if (j->start < i->start) { ++StartPos; if (StartPos != other.end() && StartPos->start <= i->start) { assert(StartPos < other.end() && i < end()); j = std::upper_bound(j, je, i->start); - if (j != other.ranges.begin()) --j; + if (j != other.begin()) --j; } } else { return true; @@ -136,10 +136,9 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other, return false; } -bool LiveInterval::overlaps(const LiveInterval &Other, - const CoalescerPair &CP, - const SlotIndexes &Indexes) const { - assert(!empty() && "empty interval"); +bool LiveRange::overlaps(const LiveRange &Other, const CoalescerPair &CP, + const SlotIndexes &Indexes) const { + assert(!empty() && "empty range"); if (Other.empty()) return false; @@ -178,9 +177,9 @@ bool LiveInterval::overlaps(const LiveInterval &Other, } } -/// overlaps - Return true if the live interval overlaps a range specified +/// overlaps - Return true if the live range overlaps an interval specified /// by [Start, End). -bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { +bool LiveRange::overlaps(SlotIndex Start, SlotIndex End) const { assert(Start < End && "Invalid range"); const_iterator I = std::lower_bound(begin(), end(), End); return I != begin() && (--I)->end > Start; @@ -190,7 +189,7 @@ bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { /// ValNo is dead, remove it. If it is the largest value number, just nuke it /// (and any other deleted values neighboring it), otherwise mark it as ~1U so /// it can be nuked later. -void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { +void LiveRange::markValNoForDeletion(VNInfo *ValNo) { if (ValNo->id == getNumValNums()-1) { do { valnos.pop_back(); @@ -202,137 +201,135 @@ void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { /// RenumberValues - Renumber all values in order of appearance and delete the /// remaining unused values. -void LiveInterval::RenumberValues(LiveIntervals &lis) { +void LiveRange::RenumberValues() { SmallPtrSet<VNInfo*, 8> Seen; valnos.clear(); for (const_iterator I = begin(), E = end(); I != E; ++I) { VNInfo *VNI = I->valno; if (!Seen.insert(VNI)) continue; - assert(!VNI->isUnused() && "Unused valno used by live range"); + assert(!VNI->isUnused() && "Unused valno used by live segment"); VNI->id = (unsigned)valnos.size(); valnos.push_back(VNI); } } -/// extendIntervalEndTo - This method is used when we want to extend the range -/// specified by I to end at the specified endpoint. To do this, we should -/// merge and eliminate all ranges that this will overlap with. The iterator is -/// not invalidated. -void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { - assert(I != ranges.end() && "Not a valid interval!"); +/// This method is used when we want to extend the segment specified by I to end +/// at the specified endpoint. To do this, we should merge and eliminate all +/// segments that this will overlap with. The iterator is not invalidated. +void LiveRange::extendSegmentEndTo(iterator I, SlotIndex NewEnd) { + assert(I != end() && "Not a valid segment!"); VNInfo *ValNo = I->valno; - // Search for the first interval that we can't merge with. - Ranges::iterator MergeTo = llvm::next(I); - for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) { + // Search for the first segment that we can't merge with. + iterator MergeTo = llvm::next(I); + for (; MergeTo != end() && NewEnd >= MergeTo->end; ++MergeTo) { assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); } - // If NewEnd was in the middle of an interval, make sure to get its endpoint. + // If NewEnd was in the middle of a segment, make sure to get its endpoint. I->end = std::max(NewEnd, prior(MergeTo)->end); - // If the newly formed range now touches the range after it and if they have - // the same value number, merge the two ranges into one range. - if (MergeTo != ranges.end() && MergeTo->start <= I->end && + // If the newly formed segment now touches the segment after it and if they + // have the same value number, merge the two segments into one segment. + if (MergeTo != end() && MergeTo->start <= I->end && MergeTo->valno == ValNo) { I->end = MergeTo->end; ++MergeTo; } - // Erase any dead ranges. - ranges.erase(llvm::next(I), MergeTo); + // Erase any dead segments. + segments.erase(llvm::next(I), MergeTo); } -/// extendIntervalStartTo - This method is used when we want to extend the range -/// specified by I to start at the specified endpoint. To do this, we should -/// merge and eliminate all ranges that this will overlap with. -LiveInterval::Ranges::iterator -LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) { - assert(I != ranges.end() && "Not a valid interval!"); +/// This method is used when we want to extend the segment specified by I to +/// start at the specified endpoint. To do this, we should merge and eliminate +/// all segments that this will overlap with. +LiveRange::iterator +LiveRange::extendSegmentStartTo(iterator I, SlotIndex NewStart) { + assert(I != end() && "Not a valid segment!"); VNInfo *ValNo = I->valno; - // Search for the first interval that we can't merge with. - Ranges::iterator MergeTo = I; + // Search for the first segment that we can't merge with. + iterator MergeTo = I; do { - if (MergeTo == ranges.begin()) { + if (MergeTo == begin()) { I->start = NewStart; - ranges.erase(MergeTo, I); + segments.erase(MergeTo, I); return I; } assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); --MergeTo; } while (NewStart <= MergeTo->start); - // If we start in the middle of another interval, just delete a range and - // extend that interval. + // If we start in the middle of another segment, just delete a range and + // extend that segment. if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) { MergeTo->end = I->end; } else { - // Otherwise, extend the interval right after. + // Otherwise, extend the segment right after. ++MergeTo; MergeTo->start = NewStart; MergeTo->end = I->end; } - ranges.erase(llvm::next(MergeTo), llvm::next(I)); + segments.erase(llvm::next(MergeTo), llvm::next(I)); return MergeTo; } -LiveInterval::iterator -LiveInterval::addRangeFrom(LiveRange LR, iterator From) { - SlotIndex Start = LR.start, End = LR.end; - iterator it = std::upper_bound(From, ranges.end(), Start); +LiveRange::iterator LiveRange::addSegmentFrom(Segment S, iterator From) { + SlotIndex Start = S.start, End = S.end; + iterator it = std::upper_bound(From, end(), Start); - // If the inserted interval starts in the middle or right at the end of - // another interval, just extend that interval to contain the range of LR. - if (it != ranges.begin()) { + // If the inserted segment starts in the middle or right at the end of + // another segment, just extend that segment to contain the segment of S. + if (it != begin()) { iterator B = prior(it); - if (LR.valno == B->valno) { + if (S.valno == B->valno) { if (B->start <= Start && B->end >= Start) { - extendIntervalEndTo(B, End); + extendSegmentEndTo(B, End); return B; } } else { - // Check to make sure that we are not overlapping two live ranges with + // Check to make sure that we are not overlapping two live segments with // different valno's. assert(B->end <= Start && - "Cannot overlap two LiveRanges with differing ValID's" + "Cannot overlap two segments with differing ValID's" " (did you def the same reg twice in a MachineInstr?)"); } } - // Otherwise, if this range ends in the middle of, or right next to, another - // interval, merge it into that interval. - if (it != ranges.end()) { - if (LR.valno == it->valno) { + // Otherwise, if this segment ends in the middle of, or right next to, another + // segment, merge it into that segment. + if (it != end()) { + if (S.valno == it->valno) { if (it->start <= End) { - it = extendIntervalStartTo(it, Start); + it = extendSegmentStartTo(it, Start); - // If LR is a complete superset of an interval, we may need to grow its + // If S is a complete superset of a segment, we may need to grow its // endpoint as well. if (End > it->end) - extendIntervalEndTo(it, End); + extendSegmentEndTo(it, End); return it; } } else { - // Check to make sure that we are not overlapping two live ranges with + // Check to make sure that we are not overlapping two live segments with // different valno's. assert(it->start >= End && - "Cannot overlap two LiveRanges with differing ValID's"); + "Cannot overlap two segments with differing ValID's"); } } - // Otherwise, this is just a new range that doesn't interact with anything. + // Otherwise, this is just a new segment that doesn't interact with anything. // Insert it. - return ranges.insert(it, LR); + return segments.insert(it, S); } -/// extendInBlock - If this interval is live before Kill in the basic +/// extendInBlock - If this range is live before Kill in the basic /// block that starts at StartIdx, extend it to be live up to Kill and return /// the value. If there is no live range before Kill, return NULL. -VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { +VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { if (empty()) return 0; iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot()); @@ -342,20 +339,21 @@ VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { if (I->end <= StartIdx) return 0; if (I->end < Kill) - extendIntervalEndTo(I, Kill); + extendSegmentEndTo(I, Kill); return I->valno; } -/// removeRange - Remove the specified range from this interval. Note that -/// the range must be in a single LiveRange in its entirety. -void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, - bool RemoveDeadValNo) { - // Find the LiveRange containing this span. - Ranges::iterator I = find(Start); - assert(I != ranges.end() && "Range is not in interval!"); - assert(I->containsRange(Start, End) && "Range is not entirely in interval!"); +/// Remove the specified segment from this range. Note that the segment must +/// be in a single Segment in its entirety. +void LiveRange::removeSegment(SlotIndex Start, SlotIndex End, + bool RemoveDeadValNo) { + // Find the Segment containing this span. + iterator I = find(Start); + assert(I != end() && "Segment is not in range!"); + assert(I->containsInterval(Start, End) + && "Segment is not entirely in range!"); - // If the span we are removing is at the start of the LiveRange, adjust it. + // If the span we are removing is at the start of the Segment, adjust it. VNInfo *ValNo = I->valno; if (I->start == Start) { if (I->end == End) { @@ -373,54 +371,50 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, } } - ranges.erase(I); // Removed the whole LiveRange. + segments.erase(I); // Removed the whole Segment. } else I->start = End; return; } - // Otherwise if the span we are removing is at the end of the LiveRange, + // Otherwise if the span we are removing is at the end of the Segment, // adjust the other way. if (I->end == End) { I->end = Start; return; } - // Otherwise, we are splitting the LiveRange into two pieces. + // Otherwise, we are splitting the Segment into two pieces. SlotIndex OldEnd = I->end; - I->end = Start; // Trim the old interval. + I->end = Start; // Trim the old segment. // Insert the new one. - ranges.insert(llvm::next(I), LiveRange(End, OldEnd, ValNo)); + segments.insert(llvm::next(I), Segment(End, OldEnd, ValNo)); } -/// removeValNo - Remove all the ranges defined by the specified value#. +/// removeValNo - Remove all the segments defined by the specified value#. /// Also remove the value# from value# list. -void LiveInterval::removeValNo(VNInfo *ValNo) { +void LiveRange::removeValNo(VNInfo *ValNo) { if (empty()) return; - Ranges::iterator I = ranges.end(); - Ranges::iterator E = ranges.begin(); + iterator I = end(); + iterator E = begin(); do { --I; if (I->valno == ValNo) - ranges.erase(I); + segments.erase(I); } while (I != E); // Now that ValNo is dead, remove it. markValNoForDeletion(ValNo); } -/// join - Join two live intervals (this, and other) together. This applies -/// mappings to the value numbers in the LHS/RHS intervals as specified. If -/// the intervals are not joinable, this aborts. -void LiveInterval::join(LiveInterval &Other, - const int *LHSValNoAssignments, - const int *RHSValNoAssignments, - SmallVector<VNInfo*, 16> &NewVNInfo, - MachineRegisterInfo *MRI) { +void LiveRange::join(LiveRange &Other, + const int *LHSValNoAssignments, + const int *RHSValNoAssignments, + SmallVectorImpl<VNInfo *> &NewVNInfo) { verify(); - // Determine if any of our live range values are mapped. This is uncommon, so - // we want to avoid the interval scan if not. + // Determine if any of our values are mapped. This is uncommon, so we want + // to avoid the range scan if not. bool MustMapCurValNos = false; unsigned NumVals = getNumValNums(); unsigned NumNewVals = NewVNInfo.size(); @@ -433,8 +427,7 @@ void LiveInterval::join(LiveInterval &Other, } } - // If we have to apply a mapping to our base interval assignment, rewrite it - // now. + // If we have to apply a mapping to our base range assignment, rewrite it now. if (MustMapCurValNos && !empty()) { // Map the first live range. @@ -445,12 +438,12 @@ void LiveInterval::join(LiveInterval &Other, assert(nextValNo != 0 && "Huh?"); // If this live range has the same value # as its immediate predecessor, - // and if they are neighbors, remove one LiveRange. This happens when we + // and if they are neighbors, remove one Segment. This happens when we // have [0,4:0)[4,7:1) and map 0/1 onto the same value #. if (OutIt->valno == nextValNo && OutIt->end == I->start) { OutIt->end = I->end; } else { - // Didn't merge. Move OutIt to the next interval, + // Didn't merge. Move OutIt to the next segment, ++OutIt; OutIt->valno = nextValNo; if (OutIt != I) { @@ -459,9 +452,9 @@ void LiveInterval::join(LiveInterval &Other, } } } - // If we merge some live ranges, chop off the end. + // If we merge some segments, chop off the end. ++OutIt; - ranges.erase(OutIt, end()); + segments.erase(OutIt, end()); } // Rewrite Other values before changing the VNInfo ids. @@ -472,7 +465,7 @@ void LiveInterval::join(LiveInterval &Other, I->valno = NewVNInfo[RHSValNoAssignments[I->valno->id]]; // Update val# info. Renumber them and make sure they all belong to this - // LiveInterval now. Also remove dead val#'s. + // LiveRange now. Also remove dead val#'s. unsigned NumValNos = 0; for (unsigned i = 0; i < NumNewVals; ++i) { VNInfo *VNI = NewVNInfo[i]; @@ -487,31 +480,31 @@ void LiveInterval::join(LiveInterval &Other, if (NumNewVals < NumVals) valnos.resize(NumNewVals); // shrinkify - // Okay, now insert the RHS live ranges into the LHS. + // Okay, now insert the RHS live segments into the LHS. LiveRangeUpdater Updater(this); for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) Updater.add(*I); } -/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live -/// interval as the specified value number. The LiveRanges in RHS are -/// allowed to overlap with LiveRanges in the current interval, but only if -/// the overlapping LiveRanges have the specified value number. -void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, - VNInfo *LHSValNo) { +/// Merge all of the segments in RHS into this live range as the specified +/// value number. The segments in RHS are allowed to overlap with segments in +/// the current range, but only if the overlapping segments have the +/// specified value number. +void LiveRange::MergeSegmentsInAsValue(const LiveRange &RHS, + VNInfo *LHSValNo) { LiveRangeUpdater Updater(this); for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) Updater.add(I->start, I->end, LHSValNo); } -/// MergeValueInAsValue - Merge all of the live ranges of a specific val# -/// in RHS into this live interval as the specified value number. -/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the -/// current interval, it will replace the value numbers of the overlaped -/// live ranges with the specified value number. -void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, - const VNInfo *RHSValNo, - VNInfo *LHSValNo) { +/// MergeValueInAsValue - Merge all of the live segments of a specific val# +/// in RHS into this live range as the specified value number. +/// The segments in RHS are allowed to overlap with segments in the +/// current range, it will replace the value numbers of the overlaped +/// segments with the specified value number. +void LiveRange::MergeValueInAsValue(const LiveRange &RHS, + const VNInfo *RHSValNo, + VNInfo *LHSValNo) { LiveRangeUpdater Updater(this); for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) if (I->valno == RHSValNo) @@ -520,9 +513,9 @@ void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, /// MergeValueNumberInto - This method is called when two value nubmers /// are found to be equivalent. This eliminates V1, replacing all -/// LiveRanges with the V1 value number with the V2 value number. This can +/// segments with the V1 value number with the V2 value number. This can /// cause merging of V1/V2 values numbers and compaction of the value space. -VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { +VNInfo *LiveRange::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { assert(V1 != V2 && "Identical value#'s are always equivalent!"); // This code actually merges the (numerically) larger value number into the @@ -536,37 +529,37 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { std::swap(V1, V2); } - // Merge V1 live ranges into V2. + // Merge V1 segments into V2. for (iterator I = begin(); I != end(); ) { - iterator LR = I++; - if (LR->valno != V1) continue; // Not a V1 LiveRange. + iterator S = I++; + if (S->valno != V1) continue; // Not a V1 Segment. // Okay, we found a V1 live range. If it had a previous, touching, V2 live // range, extend it. - if (LR != begin()) { - iterator Prev = LR-1; - if (Prev->valno == V2 && Prev->end == LR->start) { - Prev->end = LR->end; + if (S != begin()) { + iterator Prev = S-1; + if (Prev->valno == V2 && Prev->end == S->start) { + Prev->end = S->end; // Erase this live-range. - ranges.erase(LR); + segments.erase(S); I = Prev+1; - LR = Prev; + S = Prev; } } // Okay, now we have a V1 or V2 live range that is maximally merged forward. // Ensure that it is a V2 live-range. - LR->valno = V2; + S->valno = V2; - // If we can merge it into later V2 live ranges, do so now. We ignore any - // following V1 live ranges, as they will be merged in subsequent iterations + // If we can merge it into later V2 segments, do so now. We ignore any + // following V1 segments, as they will be merged in subsequent iterations // of the loop. if (I != end()) { - if (I->start == LR->end && I->valno == V2) { - LR->end = I->end; - ranges.erase(I); - I = LR+1; + if (I->start == S->end && I->valno == V2) { + S->end = I->end; + segments.erase(I); + I = S+1; } } } @@ -584,22 +577,21 @@ unsigned LiveInterval::getSize() const { return Sum; } -raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) { - return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")"; +raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange::Segment &S) { + return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ")"; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void LiveRange::dump() const { +void LiveRange::Segment::dump() const { dbgs() << *this << "\n"; } #endif -void LiveInterval::print(raw_ostream &OS) const { +void LiveRange::print(raw_ostream &OS) const { if (empty()) OS << "EMPTY"; else { - for (LiveInterval::Ranges::const_iterator I = ranges.begin(), - E = ranges.end(); I != E; ++I) { + for (const_iterator I = begin(), E = end(); I != E; ++I) { OS << *I; assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo"); } @@ -625,19 +617,29 @@ void LiveInterval::print(raw_ostream &OS) const { } } +void LiveInterval::print(raw_ostream &OS) const { + OS << PrintReg(reg) << ' '; + super::print(OS); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void LiveRange::dump() const { + dbgs() << *this << "\n"; +} + void LiveInterval::dump() const { dbgs() << *this << "\n"; } #endif #ifndef NDEBUG -void LiveInterval::verify() const { +void LiveRange::verify() const { for (const_iterator I = begin(), E = end(); I != E; ++I) { assert(I->start.isValid()); assert(I->end.isValid()); assert(I->start < I->end); assert(I->valno != 0); + assert(I->valno->id < valnos.size()); assert(I->valno == valnos[I->valno->id]); if (llvm::next(I) != E) { assert(I->end <= llvm::next(I)->start); @@ -649,10 +651,6 @@ void LiveInterval::verify() const { #endif -void LiveRange::print(raw_ostream &os) const { - os << *this; -} - //===----------------------------------------------------------------------===// // LiveRangeUpdater class //===----------------------------------------------------------------------===// @@ -665,11 +663,11 @@ void LiveRange::print(raw_ostream &os) const { // // Otherwise, segments are kept in three separate areas: // -// 1. [begin; WriteI) at the front of LI. -// 2. [ReadI; end) at the back of LI. +// 1. [begin; WriteI) at the front of LR. +// 2. [ReadI; end) at the back of LR. // 3. Spills. // -// - LI.begin() <= WriteI <= ReadI <= LI.end(). +// - LR.begin() <= WriteI <= ReadI <= LR.end(). // - Segments in all three areas are fully ordered and coalesced. // - Segments in area 1 precede and can't coalesce with segments in area 2. // - Segments in Spills precede and can't coalesce with segments in area 2. @@ -684,23 +682,23 @@ void LiveRange::print(raw_ostream &os) const { void LiveRangeUpdater::print(raw_ostream &OS) const { if (!isDirty()) { - if (LI) - OS << "Clean " << PrintReg(LI->reg) << " updater: " << *LI << '\n'; + if (LR) + OS << "Clean updater: " << *LR << '\n'; else OS << "Null updater.\n"; return; } - assert(LI && "Can't have null LI in dirty updater."); - OS << PrintReg(LI->reg) << " updater with gap = " << (ReadI - WriteI) + assert(LR && "Can't have null LR in dirty updater."); + OS << " updater with gap = " << (ReadI - WriteI) << ", last start = " << LastStart << ":\n Area 1:"; - for (LiveInterval::const_iterator I = LI->begin(); I != WriteI; ++I) + for (LiveRange::const_iterator I = LR->begin(); I != WriteI; ++I) OS << ' ' << *I; OS << "\n Spills:"; for (unsigned I = 0, E = Spills.size(); I != E; ++I) OS << ' ' << Spills[I]; OS << "\n Area 2:"; - for (LiveInterval::const_iterator I = ReadI, E = LI->end(); I != E; ++I) + for (LiveRange::const_iterator I = ReadI, E = LR->end(); I != E; ++I) OS << ' ' << *I; OS << '\n'; } @@ -711,8 +709,9 @@ void LiveRangeUpdater::dump() const } // Determine if A and B should be coalesced. -static inline bool coalescable(const LiveRange &A, const LiveRange &B) { - assert(A.start <= B.start && "Unordered live ranges."); +static inline bool coalescable(const LiveRange::Segment &A, + const LiveRange::Segment &B) { + assert(A.start <= B.start && "Unordered live segments."); if (A.end == B.start) return A.valno == B.valno; if (A.end < B.start) @@ -721,8 +720,8 @@ static inline bool coalescable(const LiveRange &A, const LiveRange &B) { return true; } -void LiveRangeUpdater::add(LiveRange Seg) { - assert(LI && "Cannot add to a null destination"); +void LiveRangeUpdater::add(LiveRange::Segment Seg) { + assert(LR && "Cannot add to a null destination"); // Flush the state if Start moves backwards. if (!LastStart.isValid() || LastStart > Seg.start) { @@ -730,21 +729,21 @@ void LiveRangeUpdater::add(LiveRange Seg) { flush(); // This brings us to an uninitialized state. Reinitialize. assert(Spills.empty() && "Leftover spilled segments"); - WriteI = ReadI = LI->begin(); + WriteI = ReadI = LR->begin(); } // Remember start for next time. LastStart = Seg.start; // Advance ReadI until it ends after Seg.start. - LiveInterval::iterator E = LI->end(); + LiveRange::iterator E = LR->end(); if (ReadI != E && ReadI->end <= Seg.start) { // First try to close the gap between WriteI and ReadI with spills. if (ReadI != WriteI) mergeSpills(); // Then advance ReadI. if (ReadI == WriteI) - ReadI = WriteI = LI->find(Seg.start); + ReadI = WriteI = LR->find(Seg.start); else while (ReadI != E && ReadI->end <= Seg.start) *WriteI++ = *ReadI++; @@ -777,7 +776,7 @@ void LiveRangeUpdater::add(LiveRange Seg) { } // Try coalescing Seg into WriteI[-1]. - if (WriteI != LI->begin() && coalescable(WriteI[-1], Seg)) { + if (WriteI != LR->begin() && coalescable(WriteI[-1], Seg)) { WriteI[-1].end = std::max(WriteI[-1].end, Seg.end); return; } @@ -788,10 +787,10 @@ void LiveRangeUpdater::add(LiveRange Seg) { return; } - // Finally, append to LI or Spills. + // Finally, append to LR or Spills. if (WriteI == E) { - LI->ranges.push_back(Seg); - WriteI = ReadI = LI->ranges.end(); + LR->segments.push_back(Seg); + WriteI = ReadI = LR->end(); } else Spills.push_back(Seg); } @@ -802,10 +801,10 @@ void LiveRangeUpdater::mergeSpills() { // Perform a backwards merge of Spills and [SpillI;WriteI). size_t GapSize = ReadI - WriteI; size_t NumMoved = std::min(Spills.size(), GapSize); - LiveInterval::iterator Src = WriteI; - LiveInterval::iterator Dst = Src + NumMoved; - LiveInterval::iterator SpillSrc = Spills.end(); - LiveInterval::iterator B = LI->begin(); + LiveRange::iterator Src = WriteI; + LiveRange::iterator Dst = Src + NumMoved; + LiveRange::iterator SpillSrc = Spills.end(); + LiveRange::iterator B = LR->begin(); // This is the new WriteI position after merging spills. WriteI = Dst; @@ -827,12 +826,12 @@ void LiveRangeUpdater::flush() { // Clear the dirty state. LastStart = SlotIndex(); - assert(LI && "Cannot add to a null destination"); + assert(LR && "Cannot add to a null destination"); // Nothing to merge? if (Spills.empty()) { - LI->ranges.erase(WriteI, ReadI); - LI->verify(); + LR->segments.erase(WriteI, ReadI); + LR->verify(); return; } @@ -840,17 +839,17 @@ void LiveRangeUpdater::flush() { size_t GapSize = ReadI - WriteI; if (GapSize < Spills.size()) { // The gap is too small. Make some room. - size_t WritePos = WriteI - LI->begin(); - LI->ranges.insert(ReadI, Spills.size() - GapSize, LiveRange()); + size_t WritePos = WriteI - LR->begin(); + LR->segments.insert(ReadI, Spills.size() - GapSize, LiveRange::Segment()); // This also invalidated ReadI, but it is recomputed below. - WriteI = LI->ranges.begin() + WritePos; + WriteI = LR->begin() + WritePos; } else { // Shrink the gap if necessary. - LI->ranges.erase(WriteI + Spills.size(), ReadI); + LR->segments.erase(WriteI + Spills.size(), ReadI); } ReadI = WriteI + Spills.size(); mergeSpills(); - LI->verify(); + LR->verify(); } unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { @@ -909,8 +908,16 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], MachineOperand &MO = RI.getOperand(); MachineInstr *MI = MO.getParent(); ++RI; - // DBG_VALUE instructions should have been eliminated earlier. - LiveRangeQuery LRQ(LI, LIS.getInstructionIndex(MI)); + // DBG_VALUE instructions don't have slot indexes, so get the index of the + // instruction before them. + // Normally, DBG_VALUE instructions are removed before this function is + // called, but it is not a requirement. + SlotIndex Idx; + if (MI->isDebugValue()) + Idx = LIS.getSlotIndexes()->getIndexBefore(MI); + else + Idx = LIS.getInstructionIndex(MI); + LiveQueryResult LRQ = LI.Query(Idx); const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined(); // In the case of an <undef> use that isn't tied to any def, VNI will be // NULL. If the use is tied to a def, VNI will be the defined value. @@ -927,11 +934,11 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], if (unsigned eq = EqClass[I->valno->id]) { assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) && "New intervals should be empty"); - LIV[eq]->ranges.push_back(*I); + LIV[eq]->segments.push_back(*I); } else *J++ = *I; } - LI.ranges.erase(J, E); + LI.segments.erase(J, E); // Transfer VNInfos to their new owners and renumber them. unsigned j = 0, e = LI.getNumValNums(); diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp index f1b8394..e1c3217 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Value.h" +#include "llvm/Support/BlockFrequency.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -51,6 +52,14 @@ INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_END(LiveIntervals, "liveintervals", "Live Interval Analysis", false, false) +#ifndef NDEBUG +static cl::opt<bool> EnablePrecomputePhysRegs( + "precompute-phys-liveness", cl::Hidden, + cl::desc("Eagerly compute live intervals for all physreg units.")); +#else +static bool EnablePrecomputePhysRegs = false; +#endif // NDEBUG + void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); @@ -86,15 +95,15 @@ void LiveIntervals::releaseMemory() { RegMaskBits.clear(); RegMaskBlocks.clear(); - for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i) - delete RegUnitIntervals[i]; - RegUnitIntervals.clear(); + for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i) + delete RegUnitRanges[i]; + RegUnitRanges.clear(); // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. VNInfoAllocator.Reset(); } -/// runOnMachineFunction - Register allocate the whole function +/// runOnMachineFunction - calculates LiveIntervals /// bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { MF = &fn; @@ -115,6 +124,12 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { computeRegMasks(); computeLiveInRegUnits(); + if (EnablePrecomputePhysRegs) { + // For stress testing, precompute live ranges of all physical register + // units, including reserved registers. + for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) + getRegUnit(i); + } DEBUG(dump()); return true; } @@ -124,15 +139,15 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const { OS << "********** INTERVALS **********\n"; // Dump the regunits. - for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i) - if (LiveInterval *LI = RegUnitIntervals[i]) - OS << PrintRegUnit(i, TRI) << " = " << *LI << '\n'; + for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i) + if (LiveRange *LR = RegUnitRanges[i]) + OS << PrintRegUnit(i, TRI) << ' ' << *LR << '\n'; // Dump the virtregs. for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (hasInterval(Reg)) - OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n'; + OS << getInterval(Reg) << '\n'; } OS << "RegMasks:"; @@ -155,16 +170,17 @@ void LiveIntervals::dumpInstrs() const { #endif LiveInterval* LiveIntervals::createInterval(unsigned reg) { - float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F; + float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? + llvm::huge_valf : 0.0F; return new LiveInterval(reg, Weight); } /// computeVirtRegInterval - Compute the live interval of a virtual register, /// based on defs and uses. -void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) { +void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LRCalc && "LRCalc not initialized."); - assert(LI->empty() && "Should only compute empty intervals."); + assert(LI.empty() && "Should only compute empty intervals."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); LRCalc->createDeadDefs(LI); LRCalc->extendToUses(LI); @@ -175,9 +191,7 @@ void LiveIntervals::computeVirtRegs() { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; - LiveInterval *LI = createInterval(Reg); - VirtRegIntervals[Reg] = LI; - computeVirtRegInterval(LI); + createAndComputeVirtRegInterval(Reg); } } @@ -214,12 +228,10 @@ void LiveIntervals::computeRegMasks() { // interference. // -/// computeRegUnitInterval - Compute the live interval of a register unit, based -/// on the uses and defs of aliasing registers. The interval should be empty, +/// computeRegUnitInterval - Compute the live range of a register unit, based +/// on the uses and defs of aliasing registers. The range should be empty, /// or contain only dead phi-defs from ABI blocks. -void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { - unsigned Unit = LI->reg; - +void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) { assert(LRCalc && "LRCalc not initialized."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); @@ -229,25 +241,21 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { // idempotent. It is very rare for a register unit to have multiple roots, so // uniquing super-registers is probably not worthwhile. for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { - unsigned Root = *Roots; - if (!MRI->reg_empty(Root)) - LRCalc->createDeadDefs(LI, Root); - for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) { + for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true); + Supers.isValid(); ++Supers) { if (!MRI->reg_empty(*Supers)) - LRCalc->createDeadDefs(LI, *Supers); + LRCalc->createDeadDefs(LR, *Supers); } } - // Now extend LI to reach all uses. + // Now extend LR to reach all uses. // Ignore uses of reserved registers. We only track defs of those. for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { - unsigned Root = *Roots; - if (!MRI->isReserved(Root) && !MRI->reg_empty(Root)) - LRCalc->extendToUses(LI, Root); - for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) { + for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true); + Supers.isValid(); ++Supers) { unsigned Reg = *Supers; if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg)) - LRCalc->extendToUses(LI, Reg); + LRCalc->extendToUses(LR, Reg); } } } @@ -258,11 +266,11 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { /// without a corresponding def when entering the entry block or a landing pad. /// void LiveIntervals::computeLiveInRegUnits() { - RegUnitIntervals.resize(TRI->getNumRegUnits()); + RegUnitRanges.resize(TRI->getNumRegUnits()); DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n"); - // Keep track of the intervals allocated. - SmallVector<LiveInterval*, 8> NewIntvs; + // Keep track of the live range sets allocated. + SmallVector<unsigned, 8> NewRanges; // Check all basic blocks for live-ins. for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); @@ -280,23 +288,25 @@ void LiveIntervals::computeLiveInRegUnits() { LIE = MBB->livein_end(); LII != LIE; ++LII) { for (MCRegUnitIterator Units(*LII, TRI); Units.isValid(); ++Units) { unsigned Unit = *Units; - LiveInterval *Intv = RegUnitIntervals[Unit]; - if (!Intv) { - Intv = RegUnitIntervals[Unit] = new LiveInterval(Unit, HUGE_VALF); - NewIntvs.push_back(Intv); + LiveRange *LR = RegUnitRanges[Unit]; + if (!LR) { + LR = RegUnitRanges[Unit] = new LiveRange(); + NewRanges.push_back(Unit); } - VNInfo *VNI = Intv->createDeadDef(Begin, getVNInfoAllocator()); + VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator()); (void)VNI; DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << '#' << VNI->id); } } DEBUG(dbgs() << '\n'); } - DEBUG(dbgs() << "Created " << NewIntvs.size() << " new intervals.\n"); + DEBUG(dbgs() << "Created " << NewRanges.size() << " new intervals.\n"); - // Compute the 'normal' part of the intervals. - for (unsigned i = 0, e = NewIntvs.size(); i != e; ++i) - computeRegUnitInterval(NewIntvs[i]); + // Compute the 'normal' part of the ranges. + for (unsigned i = 0, e = NewRanges.size(); i != e; ++i) { + unsigned Unit = NewRanges[i]; + computeRegUnitRange(*RegUnitRanges[Unit], Unit); + } } @@ -320,7 +330,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) continue; SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); - LiveRangeQuery LRQ(*li, Idx); + LiveQueryResult LRQ = li->Query(Idx); VNInfo *VNI = LRQ.valueIn(); if (!VNI) { // This shouldn't happen: readsVirtualRegister returns true, but there is @@ -339,14 +349,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, WorkList.push_back(std::make_pair(Idx, VNI)); } - // Create a new live interval with only minimal live segments per def. - LiveInterval NewLI(li->reg, 0); + // Create new live ranges with only minimal live segments per def. + LiveRange NewLR; for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; - NewLI.addRange(LiveRange(VNI->def, VNI->def.getDeadSlot(), VNI)); + NewLR.addSegment(LiveRange::Segment(VNI->def, VNI->def.getDeadSlot(), VNI)); } // Keep track of the PHIs that are in use. @@ -361,7 +371,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, SlotIndex BlockStart = getMBBStartIdx(MBB); // Extend the live range for VNI to be live at Idx. - if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) { + if (VNInfo *ExtVNI = NewLR.extendInBlock(BlockStart, Idx)) { (void)ExtVNI; assert(ExtVNI == VNI && "Unexpected existing value number"); // Is this a PHIDef we haven't seen before? @@ -382,7 +392,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // VNI is live-in to MBB. DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); - NewLI.addRange(LiveRange(BlockStart, Idx, VNI)); + NewLR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI)); // Make sure VNI is live-out from the predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), @@ -403,14 +413,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, VNInfo *VNI = *I; if (VNI->isUnused()) continue; - LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def); - assert(LII != NewLI.end() && "Missing live range for PHI"); - if (LII->end != VNI->def.getDeadSlot()) + LiveRange::iterator LRI = NewLR.FindSegmentContaining(VNI->def); + assert(LRI != NewLR.end() && "Missing segment for PHI"); + if (LRI->end != VNI->def.getDeadSlot()) continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. VNI->markUnused(); - NewLI.removeRange(*LII); + NewLR.removeSegment(LRI->start, LRI->end); DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); CanSeparate = true; } else { @@ -425,23 +435,23 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, } } - // Move the trimmed ranges back. - li->ranges.swap(NewLI.ranges); + // Move the trimmed segments back. + li->segments.swap(NewLR.segments); DEBUG(dbgs() << "Shrunk: " << *li << '\n'); return CanSeparate; } -void LiveIntervals::extendToIndices(LiveInterval *LI, +void LiveIntervals::extendToIndices(LiveRange &LR, ArrayRef<SlotIndex> Indices) { assert(LRCalc && "LRCalc not initialized."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); for (unsigned i = 0, e = Indices.size(); i != e; ++i) - LRCalc->extend(LI, Indices[i]); + LRCalc->extend(LR, Indices[i]); } void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, SmallVectorImpl<SlotIndex> *EndPoints) { - LiveRangeQuery LRQ(*LI, Kill); + LiveQueryResult LRQ = LI->Query(Kill); VNInfo *VNI = LRQ.valueOut(); if (!VNI) return; @@ -452,13 +462,13 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, // If VNI isn't live out from KillMBB, the value is trivially pruned. if (LRQ.endPoint() < MBBEnd) { - LI->removeRange(Kill, LRQ.endPoint()); + LI->removeSegment(Kill, LRQ.endPoint()); if (EndPoints) EndPoints->push_back(LRQ.endPoint()); return; } // VNI is live out of KillMBB. - LI->removeRange(Kill, MBBEnd); + LI->removeSegment(Kill, MBBEnd); if (EndPoints) EndPoints->push_back(MBBEnd); // Find all blocks that are reachable from KillMBB without leaving VNI's live @@ -476,23 +486,23 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, // Check if VNI is live in to MBB. tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB); - LiveRangeQuery LRQ(*LI, MBBStart); + LiveQueryResult LRQ = LI->Query(MBBStart); if (LRQ.valueIn() != VNI) { - // This block isn't part of the VNI live range. Prune the search. + // This block isn't part of the VNI segment. Prune the search. I.skipChildren(); continue; } // Prune the search if VNI is killed in MBB. if (LRQ.endPoint() < MBBEnd) { - LI->removeRange(MBBStart, LRQ.endPoint()); + LI->removeSegment(MBBStart, LRQ.endPoint()); if (EndPoints) EndPoints->push_back(LRQ.endPoint()); I.skipChildren(); continue; } // VNI is live through MBB. - LI->removeRange(MBBStart, MBBEnd); + LI->removeSegment(MBBStart, MBBEnd); if (EndPoints) EndPoints->push_back(MBBEnd); ++I; } @@ -505,7 +515,7 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // Keep track of regunit ranges. - SmallVector<std::pair<LiveInterval*, LiveInterval::iterator>, 8> RU; + SmallVector<std::pair<LiveRange*, LiveRange::iterator>, 8> RU; for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); @@ -520,13 +530,14 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { RU.clear(); for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid(); ++Units) { - LiveInterval *RUInt = &getRegUnit(*Units); - if (RUInt->empty()) + LiveRange &RURanges = getRegUnit(*Units); + if (RURanges.empty()) continue; - RU.push_back(std::make_pair(RUInt, RUInt->find(LI->begin()->end))); + RU.push_back(std::make_pair(&RURanges, RURanges.find(LI->begin()->end))); } - // Every instruction that kills Reg corresponds to a live range end point. + // Every instruction that kills Reg corresponds to a segment range end + // point. for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; ++RI) { // A block index indicates an MBB edge. @@ -536,7 +547,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { if (!MI) continue; - // Check if any of the reguints are live beyond the end of RI. That could + // Check if any of the regunits are live beyond the end of RI. That could // happen when a physreg is defined as a copy of a virtreg: // // %EAX = COPY %vreg5 @@ -546,12 +557,12 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX. bool CancelKill = false; for (unsigned u = 0, e = RU.size(); u != e; ++u) { - LiveInterval *RInt = RU[u].first; - LiveInterval::iterator &I = RU[u].second; - if (I == RInt->end()) + LiveRange &RRanges = *RU[u].first; + LiveRange::iterator &I = RU[u].second; + if (I == RRanges.end()) continue; - I = RInt->advanceTo(I, RI->end); - if (I == RInt->end() || I->start >= RI->end) + I = RRanges.advanceTo(I, RI->end); + if (I == RRanges.end() || I->start >= RI->end) continue; // I is overlapping RI. CancelKill = true; @@ -609,35 +620,23 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { } float -LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { - // Limit the loop depth ridiculousness. - if (loopDepth > 200) - loopDepth = 200; - - // The loop depth is used to roughly estimate the number of times the - // instruction is executed. Something like 10^d is simple, but will quickly - // overflow a float. This expression behaves like 10^d for small d, but is - // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of - // headroom before overflow. - // By the way, powf() might be unavailable here. For consistency, - // We may take pow(double,double). - float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth); - - return (isDef + isUse) * lc; +LiveIntervals::getSpillWeight(bool isDef, bool isUse, BlockFrequency freq) { + const float Scale = 1.0f / BlockFrequency::getEntryFrequency(); + return (isDef + isUse) * (freq.getFrequency() * Scale); } -LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, - MachineInstr* startInst) { - LiveInterval& Interval = getOrCreateInterval(reg); +LiveRange::Segment +LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr* startInst) { + LiveInterval& Interval = createEmptyInterval(reg); VNInfo* VN = Interval.getNextValue( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getVNInfoAllocator()); - LiveRange LR( + LiveRange::Segment S( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getMBBEndIdx(startInst->getParent()), VN); - Interval.addRange(LR); + Interval.addSegment(S); - return LR; + return S; } @@ -712,7 +711,7 @@ private: const TargetRegisterInfo& TRI; SlotIndex OldIdx; SlotIndex NewIdx; - SmallPtrSet<LiveInterval*, 8> Updated; + SmallPtrSet<LiveRange*, 8> Updated; bool UpdateFlags; public: @@ -726,7 +725,7 @@ public: // physregs, even those that aren't needed for regalloc, in order to update // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill // flags, and postRA passes will use a live register utility instead. - LiveInterval *getRegUnitLI(unsigned Unit) { + LiveRange *getRegUnitLI(unsigned Unit) { if (UpdateFlags) return &LIS.getRegUnit(Unit); return LIS.getCachedRegUnit(Unit); @@ -751,15 +750,16 @@ public: if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) { - updateRange(LIS.getInterval(Reg)); + LiveInterval &LI = LIS.getInterval(Reg); + updateRange(LI, Reg); continue; } // For physregs, only update the regunits that actually have a // precomputed live range. for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) - if (LiveInterval *LI = getRegUnitLI(*Units)) - updateRange(*LI); + if (LiveRange *LR = getRegUnitLI(*Units)) + updateRange(*LR, *Units); } if (hasRegMask) updateRegMaskSlots(); @@ -768,26 +768,26 @@ public: private: /// Update a single live range, assuming an instruction has been moved from /// OldIdx to NewIdx. - void updateRange(LiveInterval &LI) { - if (!Updated.insert(&LI)) + void updateRange(LiveRange &LR, unsigned Reg) { + if (!Updated.insert(&LR)) return; DEBUG({ dbgs() << " "; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) - dbgs() << PrintReg(LI.reg); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + dbgs() << PrintReg(Reg); else - dbgs() << PrintRegUnit(LI.reg, &TRI); - dbgs() << ":\t" << LI << '\n'; + dbgs() << PrintRegUnit(Reg, &TRI); + dbgs() << ":\t" << LR << '\n'; }); if (SlotIndex::isEarlierInstr(OldIdx, NewIdx)) - handleMoveDown(LI); + handleMoveDown(LR); else - handleMoveUp(LI); - DEBUG(dbgs() << " -->\t" << LI << '\n'); - LI.verify(); + handleMoveUp(LR, Reg); + DEBUG(dbgs() << " -->\t" << LR << '\n'); + LR.verify(); } - /// Update LI to reflect an instruction has been moved downwards from OldIdx + /// Update LR to reflect an instruction has been moved downwards from OldIdx /// to NewIdx. /// /// 1. Live def at OldIdx: @@ -801,17 +801,17 @@ private: /// Move def to NewIdx, possibly across another live value. /// /// 4. Def at OldIdx AND at NewIdx: - /// Remove live range [OldIdx;NewIdx) and value defined at OldIdx. + /// Remove segment [OldIdx;NewIdx) and value defined at OldIdx. /// (Happens when bundling multiple defs together). /// /// 5. Value read at OldIdx, killed before NewIdx: /// Extend kill to NewIdx. /// - void handleMoveDown(LiveInterval &LI) { + void handleMoveDown(LiveRange &LR) { // First look for a kill at OldIdx. - LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex()); - LiveInterval::iterator E = LI.end(); - // Is LI even live at OldIdx? + LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); + LiveRange::iterator E = LR.end(); + // Is LR even live at OldIdx? if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) return; @@ -828,7 +828,7 @@ private: for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO) if (MO->isReg() && MO->isUse()) MO->setIsKill(false); - // Adjust I->end to reach NewIdx. This may temporarily make LI invalid by + // Adjust I->end to reach NewIdx. This may temporarily make LR invalid by // overlapping ranges. Case 5 above. I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); // If this was a kill, there may also be a def. Otherwise we're done. @@ -857,24 +857,25 @@ private: assert((I->end == OldIdx.getDeadSlot() || SlotIndex::isSameInstr(I->end, NewIdx)) && "Cannot move def below kill"); - LiveInterval::iterator NewI = LI.advanceTo(I, NewIdx.getRegSlot()); + LiveRange::iterator NewI = LR.advanceTo(I, NewIdx.getRegSlot()); if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) { // There is an existing def at NewIdx, case 4 above. The def at OldIdx is // coalesced into that value. assert(NewI->valno != DefVNI && "Multiple defs of value?"); - LI.removeValNo(DefVNI); + LR.removeValNo(DefVNI); return; } // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx. - // If the def at OldIdx was dead, we allow it to be moved across other LI + // If the def at OldIdx was dead, we allow it to be moved across other LR // values. The new range should be placed immediately before NewI, move any // intermediate ranges up. assert(NewI != I && "Inconsistent iterators"); std::copy(llvm::next(I), NewI, I); - *llvm::prior(NewI) = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); + *llvm::prior(NewI) + = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } - /// Update LI to reflect an instruction has been moved upwards from OldIdx + /// Update LR to reflect an instruction has been moved upwards from OldIdx /// to NewIdx. /// /// 1. Live def at OldIdx: @@ -894,11 +895,11 @@ private: /// Hoist kill to NewIdx, then scan for last kill between NewIdx and /// OldIdx. /// - void handleMoveUp(LiveInterval &LI) { + void handleMoveUp(LiveRange &LR, unsigned Reg) { // First look for a kill at OldIdx. - LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex()); - LiveInterval::iterator E = LI.end(); - // Is LI even live at OldIdx? + LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); + LiveRange::iterator E = LR.end(); + // Is LR even live at OldIdx? if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) return; @@ -915,7 +916,7 @@ private: if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) { // No def, search for the new kill. // This can never be an early clobber kill since there is no def. - llvm::prior(I)->end = findLastUseBefore(LI.reg).getRegSlot(); + llvm::prior(I)->end = findLastUseBefore(Reg).getRegSlot(); return; } } @@ -927,18 +928,18 @@ private: DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); // Check for an existing def at NewIdx. - LiveInterval::iterator NewI = LI.find(NewIdx.getRegSlot()); + LiveRange::iterator NewI = LR.find(NewIdx.getRegSlot()); if (SlotIndex::isSameInstr(NewI->start, NewIdx)) { assert(NewI->valno != DefVNI && "Same value defined more than once?"); // There is an existing def at NewIdx. if (I->end.isDead()) { // Case 3: Remove the dead def at OldIdx. - LI.removeValNo(DefVNI); + LR.removeValNo(DefVNI); return; } // Case 4: Replace def at NewIdx with live def at OldIdx. I->start = DefVNI->def; - LI.removeValNo(NewI->valno); + LR.removeValNo(NewI->valno); return; } @@ -949,10 +950,10 @@ private: return; } - // DefVNI is a dead def. It may have been moved across other values in LI, + // DefVNI is a dead def. It may have been moved across other values in LR, // so move I up to NewI. Slide [NewI;I) down one position. std::copy_backward(NewI, I, llvm::next(I)); - *NewI = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); + *NewI = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } void updateRegMaskSlots() { @@ -1075,8 +1076,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, if (MOI->isReg() && TargetRegisterInfo::isVirtualRegister(MOI->getReg()) && !hasInterval(MOI->getReg())) { - LiveInterval &LI = getOrCreateInterval(MOI->getReg()); - computeVirtRegInterval(&LI); + createAndComputeVirtRegInterval(MOI->getReg()); } } } @@ -1123,9 +1123,9 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, if (LII != LI.begin()) prevStart = llvm::prior(LII)->start; - // FIXME: This could be more efficient if there was a removeRange - // method that returned an iterator. - LI.removeRange(*LII, true); + // FIXME: This could be more efficient if there was a + // removeSegment method that returned an iterator. + LI.removeSegment(*LII, true); if (prevStart.isValid()) LII = LI.find(prevStart); else @@ -1144,13 +1144,14 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, if (!lastUseIdx.isValid()) { VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator); - LiveRange LR(instrIdx.getRegSlot(), instrIdx.getDeadSlot(), VNI); - LII = LI.addRange(LR); + LiveRange::Segment S(instrIdx.getRegSlot(), + instrIdx.getDeadSlot(), VNI); + LII = LI.addSegment(S); } else if (LII->start != instrIdx.getRegSlot()) { VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator); - LiveRange LR(instrIdx.getRegSlot(), lastUseIdx, VNI); - LII = LI.addRange(LR); + LiveRange::Segment S(instrIdx.getRegSlot(), lastUseIdx, VNI); + LII = LI.addSegment(S); } if (MO.getSubReg() && !MO.isUndef()) diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp index dede490..ae086bc 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -36,11 +36,11 @@ void LiveRangeCalc::reset(const MachineFunction *mf, } -void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) { +void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { assert(MRI && Indexes && "call reset() first"); // Visit all def operands. If the same instruction has multiple defs of Reg, - // LI->createDeadDef() will deduplicate. + // LR.createDeadDef() will deduplicate. for (MachineRegisterInfo::def_iterator I = MRI->def_begin(Reg), E = MRI->def_end(); I != E; ++I) { const MachineInstr *MI = &*I; @@ -54,13 +54,13 @@ void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) { Idx = Indexes->getInstructionIndex(MI) .getRegSlot(I.getOperand().isEarlyClobber()); - // Create the def in LI. This may find an existing def. - LI->createDeadDef(Idx, *Alloc); + // Create the def in LR. This may find an existing def. + LR.createDeadDef(Idx, *Alloc); } } -void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { +void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg) { assert(MRI && Indexes && "call reset() first"); // Visit all operands that read Reg. This may include partial defs. @@ -99,7 +99,7 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { Idx = Idx.getRegSlot(true); } } - extend(LI, Idx, Reg); + extend(LR, Idx, Reg); } } @@ -125,17 +125,14 @@ void LiveRangeCalc::updateLiveIns() { assert(Seen.test(MBB->getNumber())); LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)0); } - Updater.setDest(I->LI); + Updater.setDest(&I->LR); Updater.add(Start, End, I->Value); } LiveIn.clear(); } -void LiveRangeCalc::extend(LiveInterval *LI, - SlotIndex Kill, - unsigned PhysReg) { - assert(LI && "Missing live range"); +void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg) { assert(Kill.isValid() && "Invalid SlotIndex"); assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); @@ -144,14 +141,14 @@ void LiveRangeCalc::extend(LiveInterval *LI, assert(KillMBB && "No MBB at Kill"); // Is there a def in the same MBB we can extend? - if (LI->extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill)) + if (LR.extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill)) return; // Find the single reaching def, or determine if Kill is jointly dominated by // multiple values, and we may need to create even more phi-defs to preserve // VNInfo SSA form. Perform a search for all predecessor blocks where we // know the dominating VNInfo. - if (findReachingDefs(LI, KillMBB, Kill, PhysReg)) + if (findReachingDefs(LR, *KillMBB, Kill, PhysReg)) return; // When there were multiple different values, we may need new PHIs. @@ -170,13 +167,11 @@ void LiveRangeCalc::calculateValues() { } -bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, - MachineBasicBlock *KillMBB, - SlotIndex Kill, - unsigned PhysReg) { - unsigned KillMBBNum = KillMBB->getNumber(); +bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, + SlotIndex Kill, unsigned PhysReg) { + unsigned KillMBBNum = KillMBB.getNumber(); - // Block numbers where LI should be live-in. + // Block numbers where LR should be live-in. SmallVector<unsigned, 16> WorkList(1, KillMBBNum); // Remember if we have seen more than one value. @@ -203,7 +198,7 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, #endif for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { + PE = MBB->pred_end(); PI != PE; ++PI) { MachineBasicBlock *Pred = *PI; // Is this a known live-out block? @@ -221,7 +216,7 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, // First time we see Pred. Try to determine the live-out value, but set // it as null if Pred is live-through with an unknown value. - VNInfo *VNI = LI->extendInBlock(Start, End); + VNInfo *VNI = LR.extendInBlock(Start, End); setLiveOutValue(Pred, VNI); if (VNI) { if (TheVNI && TheVNI != VNI) @@ -231,7 +226,7 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, } // No, we need a live-in value for Pred as well - if (Pred != KillMBB) + if (Pred != &KillMBB) WorkList.push_back(Pred->getNumber()); else // Loopback to KillMBB, so value is really live through. @@ -248,9 +243,9 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, // If a unique reaching def was found, blit in the live ranges immediately. if (UniqueVNI) { - LiveRangeUpdater Updater(LI); - for (SmallVectorImpl<unsigned>::const_iterator - I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { + LiveRangeUpdater Updater(&LR); + for (SmallVectorImpl<unsigned>::const_iterator I = WorkList.begin(), + E = WorkList.end(); I != E; ++I) { SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(*I); // Trim the live range in KillMBB. @@ -270,8 +265,8 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, for (SmallVectorImpl<unsigned>::const_iterator I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { MachineBasicBlock *MBB = MF->getBlockNumbered(*I); - addLiveInBlock(LI, DomTree->getNode(MBB)); - if (MBB == KillMBB) + addLiveInBlock(LR, DomTree->getNode(MBB)); + if (MBB == &KillMBB) LiveIn.back().Kill = Kill; } @@ -348,16 +343,17 @@ void LiveRangeCalc::updateSSA() { assert(Alloc && "Need VNInfo allocator to create PHI-defs"); SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(MBB); - VNInfo *VNI = I->LI->getNextValue(Start, *Alloc); + LiveRange &LR = I->LR; + VNInfo *VNI = LR.getNextValue(Start, *Alloc); I->Value = VNI; // This block is done, we know the final value. I->DomNode = 0; // Add liveness since updateLiveIns now skips this node. if (I->Kill.isValid()) - I->LI->addRange(LiveRange(Start, I->Kill, VNI)); + LR.addSegment(LiveInterval::Segment(Start, I->Kill, VNI)); else { - I->LI->addRange(LiveRange(Start, End, VNI)); + LR.addSegment(LiveInterval::Segment(Start, End, VNI)); LOP = LiveOutPair(VNI, Node); } } else if (IDomValue.first) { diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h index 57cab7b..a3a3fbb 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h @@ -75,9 +75,9 @@ class LiveRangeCalc { /// LiveInBlock - Information about a basic block where a live range is known /// to be live-in, but the value has not yet been determined. struct LiveInBlock { - // LI - The live range that is live-in to this block. The algorithms can + // The live range set that is live-in to this block. The algorithms can // handle multiple non-overlapping live ranges simultaneously. - LiveInterval *LI; + LiveRange &LR; // DomNode - Dominator tree node for the block. // Cleared when the final value has been determined and LI has been updated. @@ -91,8 +91,8 @@ class LiveRangeCalc { // Live-in value filled in by updateSSA once it is known. VNInfo *Value; - LiveInBlock(LiveInterval *li, MachineDomTreeNode *node, SlotIndex kill) - : LI(li), DomNode(node), Kill(kill), Value(0) {} + LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill) + : LR(LR), DomNode(node), Kill(kill), Value(0) {} }; /// LiveIn - Work list of blocks where the live-in value has yet to be @@ -111,10 +111,8 @@ class LiveRangeCalc { /// are added to the LiveIn array, and the function returns false. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - bool findReachingDefs(LiveInterval *LI, - MachineBasicBlock *KillMBB, - SlotIndex Kill, - unsigned PhysReg); + bool findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, + SlotIndex Kill, unsigned PhysReg); /// updateSSA - Compute the values that will be live in to all requested /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. @@ -146,10 +144,6 @@ public: MachineDominatorTree*, VNInfo::Allocator*); - /// calculate - Calculate the live range of a virtual register from its defs - /// and uses. LI must be empty with no values. - void calculate(LiveInterval *LI); - //===--------------------------------------------------------------------===// // Mid-level interface. //===--------------------------------------------------------------------===// @@ -165,27 +159,27 @@ public: /// single existing value, Alloc may be null. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - void extend(LiveInterval *LI, SlotIndex Kill, unsigned PhysReg = 0); + void extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg = 0); /// createDeadDefs - Create a dead def in LI for every def operand of Reg. /// Each instruction defining Reg gets a new VNInfo with a corresponding /// minimal live range. - void createDeadDefs(LiveInterval *LI, unsigned Reg); + void createDeadDefs(LiveRange &LR, unsigned Reg); /// createDeadDefs - Create a dead def in LI for every def of LI->reg. - void createDeadDefs(LiveInterval *LI) { - createDeadDefs(LI, LI->reg); + void createDeadDefs(LiveInterval &LI) { + createDeadDefs(LI, LI.reg); } /// extendToUses - Extend the live range of LI to reach all uses of Reg. /// /// All uses must be jointly dominated by existing liveness. PHI-defs are /// inserted as needed to preserve SSA form. - void extendToUses(LiveInterval *LI, unsigned Reg); + void extendToUses(LiveRange &LR, unsigned Reg); /// extendToUses - Extend the live range of LI to reach all uses of LI->reg. - void extendToUses(LiveInterval *LI) { - extendToUses(LI, LI->reg); + void extendToUses(LiveInterval &LI) { + extendToUses(LI, LI.reg); } //===--------------------------------------------------------------------===// @@ -216,15 +210,15 @@ public: /// function can only be called once per basic block. Once the live-in value /// has been determined, calculateValues() will add liveness to LI. /// - /// @param LI The live range that is live-in to the block. + /// @param LR The live range that is live-in to the block. /// @param DomNode The domtree node for the block. /// @param Kill Index in block where LI is killed. If the value is /// live-through, set Kill = SLotIndex() and also call /// setLiveOutValue(MBB, 0). - void addLiveInBlock(LiveInterval *LI, + void addLiveInBlock(LiveRange &LR, MachineDomTreeNode *DomNode, SlotIndex Kill = SlotIndex()) { - LiveIn.push_back(LiveInBlock(LI, DomNode, Kill)); + LiveIn.push_back(LiveInBlock(LR, DomNode, Kill)); } /// calculateValues - Calculate the value that will be live-in to each block diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp index 7793e96..cb70c43 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -13,7 +13,6 @@ #define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/LiveRangeEdit.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -31,17 +30,23 @@ STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); void LiveRangeEdit::Delegate::anchor() { } -LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) { +LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) { unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); if (VRM) { - VRM->grow(); VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); } - LiveInterval &LI = LIS.getOrCreateInterval(VReg); - NewRegs.push_back(&LI); + LiveInterval &LI = LIS.createEmptyInterval(VReg); return LI; } +unsigned LiveRangeEdit::createFrom(unsigned OldReg) { + unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + if (VRM) { + VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); + } + return VReg; +} + bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, AliasAnalysis *aa) { @@ -216,108 +221,122 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, return true; } -void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, - ArrayRef<unsigned> RegsBeingSpilled) { - SetVector<LiveInterval*, - SmallVector<LiveInterval*, 8>, - SmallPtrSet<LiveInterval*, 8> > ToShrink; +/// Find all live intervals that need to shrink, then remove the instruction. +void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { + assert(MI->allDefsAreDead() && "Def isn't really dead"); + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); - for (;;) { - // Erase all dead defs. - while (!Dead.empty()) { - MachineInstr *MI = Dead.pop_back_val(); - assert(MI->allDefsAreDead() && "Def isn't really dead"); - SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); - - // Never delete inline asm. - if (MI->isInlineAsm()) { - DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI); - continue; - } + // Never delete a bundled instruction. + if (MI->isBundled()) { + return; + } + // Never delete inline asm. + if (MI->isInlineAsm()) { + DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI); + return; + } - // Use the same criteria as DeadMachineInstructionElim. - bool SawStore = false; - if (!MI->isSafeToMove(&TII, 0, SawStore)) { - DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); - continue; - } + // Use the same criteria as DeadMachineInstructionElim. + bool SawStore = false; + if (!MI->isSafeToMove(&TII, 0, SawStore)) { + DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); + return; + } - DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); - - // Collect virtual registers to be erased after MI is gone. - SmallVector<unsigned, 8> RegsToErase; - bool ReadsPhysRegs = false; - - // Check for live intervals that may shrink - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { - if (!MOI->isReg()) - continue; - unsigned Reg = MOI->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { - // Check if MI reads any unreserved physregs. - if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) - ReadsPhysRegs = true; - continue; - } - LiveInterval &LI = LIS.getInterval(Reg); - - // Shrink read registers, unless it is likely to be expensive and - // unlikely to change anything. We typically don't want to shrink the - // PIC base register that has lots of uses everywhere. - // Always shrink COPY uses that probably come from live range splitting. - if (MI->readsVirtualRegister(Reg) && - (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) || - LI.killedAt(Idx))) - ToShrink.insert(&LI); - - // Remove defined value. - if (MOI->isDef()) { - if (VNInfo *VNI = LI.getVNInfoAt(Idx)) { - if (TheDelegate) - TheDelegate->LRE_WillShrinkVirtReg(LI.reg); - LI.removeValNo(VNI); - if (LI.empty()) - RegsToErase.push_back(Reg); + DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); + + // Collect virtual registers to be erased after MI is gone. + SmallVector<unsigned, 8> RegsToErase; + bool ReadsPhysRegs = false; + + // Check for live intervals that may shrink + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (!MOI->isReg()) + continue; + unsigned Reg = MOI->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + // Check if MI reads any unreserved physregs. + if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) + ReadsPhysRegs = true; + else if (MOI->isDef()) { + for (MCRegUnitIterator Units(Reg, MRI.getTargetRegisterInfo()); + Units.isValid(); ++Units) { + if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) { + if (VNInfo *VNI = LR->getVNInfoAt(Idx)) + LR->removeValNo(VNI); } } } - - // Currently, we don't support DCE of physreg live ranges. If MI reads - // any unreserved physregs, don't erase the instruction, but turn it into - // a KILL instead. This way, the physreg live ranges don't end up - // dangling. - // FIXME: It would be better to have something like shrinkToUses() for - // physregs. That could potentially enable more DCE and it would free up - // the physreg. It would not happen often, though. - if (ReadsPhysRegs) { - MI->setDesc(TII.get(TargetOpcode::KILL)); - // Remove all operands that aren't physregs. - for (unsigned i = MI->getNumOperands(); i; --i) { - const MachineOperand &MO = MI->getOperand(i-1); - if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - continue; - MI->RemoveOperand(i-1); - } - DEBUG(dbgs() << "Converted physregs to:\t" << *MI); - } else { + continue; + } + LiveInterval &LI = LIS.getInterval(Reg); + + // Shrink read registers, unless it is likely to be expensive and + // unlikely to change anything. We typically don't want to shrink the + // PIC base register that has lots of uses everywhere. + // Always shrink COPY uses that probably come from live range splitting. + if (MI->readsVirtualRegister(Reg) && + (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) || + LI.Query(Idx).isKill())) + ToShrink.insert(&LI); + + // Remove defined value. + if (MOI->isDef()) { + if (VNInfo *VNI = LI.getVNInfoAt(Idx)) { if (TheDelegate) - TheDelegate->LRE_WillEraseInstruction(MI); - LIS.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - ++NumDCEDeleted; + TheDelegate->LRE_WillShrinkVirtReg(LI.reg); + LI.removeValNo(VNI); + if (LI.empty()) + RegsToErase.push_back(Reg); } + } + } - // Erase any virtregs that are now empty and unused. There may be <undef> - // uses around. Keep the empty live range in that case. - for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) { - unsigned Reg = RegsToErase[i]; - if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) { - ToShrink.remove(&LIS.getInterval(Reg)); - eraseVirtReg(Reg); - } - } + // Currently, we don't support DCE of physreg live ranges. If MI reads + // any unreserved physregs, don't erase the instruction, but turn it into + // a KILL instead. This way, the physreg live ranges don't end up + // dangling. + // FIXME: It would be better to have something like shrinkToUses() for + // physregs. That could potentially enable more DCE and it would free up + // the physreg. It would not happen often, though. + if (ReadsPhysRegs) { + MI->setDesc(TII.get(TargetOpcode::KILL)); + // Remove all operands that aren't physregs. + for (unsigned i = MI->getNumOperands(); i; --i) { + const MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + continue; + MI->RemoveOperand(i-1); } + DEBUG(dbgs() << "Converted physregs to:\t" << *MI); + } else { + if (TheDelegate) + TheDelegate->LRE_WillEraseInstruction(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + ++NumDCEDeleted; + } + + // Erase any virtregs that are now empty and unused. There may be <undef> + // uses around. Keep the empty live range in that case. + for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) { + unsigned Reg = RegsToErase[i]; + if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) { + ToShrink.remove(&LIS.getInterval(Reg)); + eraseVirtReg(Reg); + } + } +} + +void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, + ArrayRef<unsigned> RegsBeingSpilled) { + ToShrinkSet ToShrink; + + for (;;) { + // Erase all dead defs. + while (!Dead.empty()) + eliminateDeadDef(Dead.pop_back_val(), ToShrink); if (ToShrink.empty()) break; @@ -331,7 +350,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, TheDelegate->LRE_WillShrinkVirtReg(LI->reg); if (!LIS.shrinkToUses(LI, &Dead)) continue; - + // Don't create new intervals for a register being spilled. // The new intervals would have to be spilled anyway so its not worth it. // Also they currently aren't spilled so creating them and not spilling @@ -343,11 +362,11 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, break; } } - + if (BeingSpilled) continue; // LI may have been separated, create new intervals. - LI->RenumberValues(LIS); + LI->RenumberValues(); ConnectedVNInfoEqClasses ConEQ(LIS); unsigned NumComp = ConEQ.Classify(LI); if (NumComp <= 1) @@ -357,7 +376,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); SmallVector<LiveInterval*, 8> Dups(1, LI); for (unsigned i = 1; i != NumComp; ++i) { - Dups.push_back(&createFrom(LI->reg)); + Dups.push_back(&createEmptyIntervalFrom(LI->reg)); // If LI is an original interval that hasn't been split yet, make the new // intervals their own originals instead of referring to LI. The original // interval must contain all the split products, and LI doesn't. @@ -374,14 +393,27 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, } } -void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, - const MachineLoopInfo &Loops) { - VirtRegAuxInfo VRAI(MF, LIS, Loops); - for (iterator I = begin(), E = end(); I != E; ++I) { - LiveInterval &LI = **I; +// Keep track of new virtual registers created via +// MachineRegisterInfo::createVirtualRegister. +void +LiveRangeEdit::MRI_NoteNewVirtualRegister(unsigned VReg) +{ + if (VRM) + VRM->grow(); + + NewRegs.push_back(VReg); +} + +void +LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, + const MachineLoopInfo &Loops, + const MachineBlockFrequencyInfo &MBFI) { + VirtRegAuxInfo VRAI(MF, LIS, Loops, MBFI); + for (unsigned I = 0, Size = size(); I < Size; ++I) { + LiveInterval &LI = LIS.getInterval(get(I)); if (MRI.recomputeRegClass(LI.reg, MF.getTarget())) DEBUG(dbgs() << "Inflated " << PrintReg(LI.reg) << " to " << MRI.getRegClass(LI.reg)->getName() << '\n'); - VRAI.CalculateWeightAndHint(LI); + VRAI.calculateSpillWeightAndHint(LI); } } diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp index 0ef069f..1d801ac 100644 --- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -119,9 +119,11 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, if (VirtReg.empty()) return false; CoalescerPair CP(VirtReg.reg, PhysReg, *TRI); - for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) - if (VirtReg.overlaps(LIS->getRegUnit(*Units), CP, *LIS->getSlotIndexes())) + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + const LiveRange &UnitRange = LIS->getRegUnit(*Units); + if (VirtReg.overlaps(UnitRange, CP, *LIS->getSlotIndexes())) return true; + } return false; } diff --git a/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp b/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp new file mode 100644 index 0000000..6221ca2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp @@ -0,0 +1,111 @@ +//===-- LiveInterval.cpp - Live Interval Representation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveRegUnits utility for tracking liveness of +// physical register units across machine instructions in forward or backward +// order. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +using namespace llvm; + +/// Return true if the given MachineOperand clobbers the given register unit. +/// A register unit is only clobbered if all its super-registers are clobbered. +static bool operClobbersUnit(const MachineOperand *MO, unsigned Unit, + const MCRegisterInfo *MCRI) { + for (MCRegUnitRootIterator RI(Unit, MCRI); RI.isValid(); ++RI) { + for (MCSuperRegIterator SI(*RI, MCRI, true); SI.isValid(); ++SI) { + if (!MO->clobbersPhysReg(*SI)) + return false; + } + } + return true; +} + +/// We assume the high bits of a physical super register are not preserved +/// unless the instruction has an implicit-use operand reading the +/// super-register or a register unit for the upper bits is available. +void LiveRegUnits::removeRegsInMask(const MachineOperand &Op, + const MCRegisterInfo &MCRI) { + SparseSet<unsigned>::iterator LUI = LiveUnits.begin(); + while (LUI != LiveUnits.end()) { + if (operClobbersUnit(&Op, *LUI, &MCRI)) + LUI = LiveUnits.erase(LUI); + else + ++LUI; + } +} + +void LiveRegUnits::stepBackward(const MachineInstr &MI, + const MCRegisterInfo &MCRI) { + // Remove defined registers and regmask kills from the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (O->isReg()) { + if (!O->isDef()) + continue; + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + removeReg(Reg, MCRI); + } else if (O->isRegMask()) { + removeRegsInMask(*O, MCRI); + } + } + // Add uses to the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (!O->isReg() || !O->readsReg() || O->isUndef()) + continue; + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + addReg(Reg, MCRI); + } +} + +/// Uses with kill flag get removed from the set, defs added. If possible +/// use StepBackward() instead of this function because some kill flags may +/// be missing. +void LiveRegUnits::stepForward(const MachineInstr &MI, + const MCRegisterInfo &MCRI) { + SmallVector<unsigned, 4> Defs; + // Remove killed registers from the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (O->isReg()) { + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + if (O->isDef()) { + if (!O->isDead()) + Defs.push_back(Reg); + } else { + if (!O->isKill()) + continue; + assert(O->isUse()); + removeReg(Reg, MCRI); + } + } else if (O->isRegMask()) { + removeRegsInMask(*O, MCRI); + } + } + // Add defs to the set. + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + addReg(Defs[i], MCRI); + } +} + +/// Adds all registers in the live-in list of block @p BB. +void LiveRegUnits::addLiveIns(const MachineBasicBlock *MBB, + const MCRegisterInfo &MCRI) { + for (MachineBasicBlock::livein_iterator L = MBB->livein_begin(), + LE = MBB->livein_end(); L != LE; ++L) { + addReg(*L, MCRI); + } +} diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp index 789eddc..ed55d7a 100644 --- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp @@ -217,8 +217,8 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, continue; unsigned DefReg = MO.getReg(); if (TRI->isSubRegister(Reg, DefReg)) { - PartDefRegs.insert(DefReg); - for (MCSubRegIterator SubRegs(DefReg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(DefReg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) PartDefRegs.insert(*SubRegs); } } @@ -271,8 +271,8 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { true/*IsImp*/)); // Remember this use. - PhysRegUse[Reg] = MI; - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) PhysRegUse[*SubRegs] = MI; } @@ -350,8 +350,8 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { continue; } if (MachineInstr *Use = PhysRegUse[SubReg]) { - PartUses.insert(SubReg); - for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); SS.isValid(); + ++SS) PartUses.insert(*SS); unsigned Dist = DistanceMap[Use]; if (Dist > LastRefOrPartRefDist) { @@ -387,8 +387,8 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { LastSubRef->addRegisterKilled(SubReg, TRI, true); else { LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); - PhysRegUse[SubReg] = LastRefOrPartRef; - for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); + SS.isValid(); ++SS) PhysRegUse[*SS] = LastRefOrPartRef; } for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) @@ -441,12 +441,12 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) { } void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, - SmallVector<unsigned, 4> &Defs) { + SmallVectorImpl<unsigned> &Defs) { // What parts of the register are previously defined? SmallSet<unsigned, 32> Live; if (PhysRegDef[Reg] || PhysRegUse[Reg]) { - Live.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) Live.insert(*SubRegs); } else { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { @@ -460,8 +460,8 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, if (Live.count(SubReg)) continue; if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) { - Live.insert(SubReg); - for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); + SS.isValid(); ++SS) Live.insert(*SS); } } @@ -484,13 +484,12 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, } void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, - SmallVector<unsigned, 4> &Defs) { + SmallVectorImpl<unsigned> &Defs) { while (!Defs.empty()) { unsigned Reg = Defs.back(); Defs.pop_back(); - PhysRegDef[Reg] = MI; - PhysRegUse[Reg] = NULL; - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) { unsigned SubReg = *SubRegs; PhysRegDef[SubReg] = MI; PhysRegUse[SubReg] = NULL; @@ -610,9 +609,9 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // if they have PHI nodes, and if so, we simulate an assignment at the end // of the current block. if (!PHIVarInfo[MBB->getNumber()].empty()) { - SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()]; + SmallVectorImpl<unsigned> &VarInfoVec = PHIVarInfo[MBB->getNumber()]; - for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(), + for (SmallVectorImpl<unsigned>::iterator I = VarInfoVec.begin(), E = VarInfoVec.end(); I != E; ++I) // Mark it alive only in the block we are representing. MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp index 91810bd..ca71e3b 100644 --- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -52,7 +52,7 @@ MCSymbol *MachineBasicBlock::getSymbol() const { if (!CachedMCSymbol) { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); - const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix(); + const char *Prefix = Ctx.getAsmInfo()->getPrivateGlobalPrefix(); CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber())); @@ -861,7 +861,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { LiveInterval &LI = LIS->getInterval(Reg); VNInfo *VNI = LI.getVNInfoAt(PrevIndex); assert(VNI && "PHI sources should be live out of their predecessors."); - LI.addRange(LiveRange(StartIndex, EndIndex, VNI)); + LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } } } @@ -880,9 +880,9 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (isLiveOut && isLastMBB) { VNInfo *VNI = LI.getVNInfoAt(PrevIndex); assert(VNI && "LiveInterval should have VNInfo where it is live."); - LI.addRange(LiveRange(StartIndex, EndIndex, VNI)); + LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } else if (!isLiveOut && !isLastMBB) { - LI.removeRange(StartIndex, EndIndex); + LI.removeSegment(StartIndex, EndIndex); } } diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 070daf2..e269d24 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -50,11 +50,6 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { return false; } -/// getblockFreq - Return block frequency. Return 0 if we don't have the -/// information. Please note that initial frequency is equal to 1024. It means -/// that we should not rely on the value itself, but only on the comparison to -/// the other block frequencies. We do this to avoid using of floating points. -/// BlockFrequency MachineBlockFrequencyInfo:: getBlockFreq(const MachineBasicBlock *MBB) const { return MBFI->getBlockFreq(MBB); diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp index bfba503..4b0f7f3 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -991,6 +991,28 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { Cond.clear(); MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // The "PrevBB" is not yet updated to reflect current code layout, so, + // o. it may fall-through to a block without explict "goto" instruction + // before layout, and no longer fall-through it after layout; or + // o. just opposite. + // + // AnalyzeBranch() may return erroneous value for FBB when these two + // situations take place. For the first scenario FBB is mistakenly set + // NULL; for the 2nd scenario, the FBB, which is expected to be NULL, + // is mistakenly pointing to "*BI". + // + bool needUpdateBr = true; + if (!Cond.empty() && (!FBB || FBB == *BI)) { + PrevBB->updateTerminator(); + needUpdateBr = false; + Cond.clear(); + TBB = FBB = 0; + if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // FIXME: This should never take place. + TBB = FBB = 0; + } + } + // If PrevBB has a two-way branch, try to re-order the branches // such that we branch to the successor with higher weight first. if (TBB && !Cond.empty() && FBB && @@ -1003,8 +1025,10 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { DebugLoc dl; // FIXME: this is nowhere TII->RemoveBranch(*PrevBB); TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); + needUpdateBr = true; } - PrevBB->updateTerminator(); + if (needUpdateBr) + PrevBB->updateTerminator(); } } diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp index 61d8d38..d228286 100644 --- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -84,11 +84,11 @@ namespace { bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet<unsigned,8> &PhysRefs, - SmallVector<unsigned,2> &PhysDefs, + SmallVectorImpl<unsigned> &PhysDefs, bool &PhysUseDef) const; bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, SmallSet<unsigned,8> &PhysRefs, - SmallVector<unsigned,2> &PhysDefs, + SmallVectorImpl<unsigned> &PhysDefs, bool &NonLocal) const; bool isCSECandidate(MachineInstr *MI); bool isProfitableToCSE(unsigned CSReg, unsigned Reg, @@ -193,7 +193,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet<unsigned,8> &PhysRefs, - SmallVector<unsigned,2> &PhysDefs, + SmallVectorImpl<unsigned> &PhysDefs, bool &PhysUseDef) const{ // First, add all uses to PhysRefs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -244,7 +244,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, SmallSet<unsigned,8> &PhysRefs, - SmallVector<unsigned,2> &PhysDefs, + SmallVectorImpl<unsigned> &PhysDefs, bool &NonLocal) const { // For now conservatively returns false if the common subexpression is // not in the same basic block as the given instruction. The only exception diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp index dc8a224..4f48e2c 100644 --- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -213,9 +213,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { CopyMap.erase(*AI); AvailCopyMap.erase(*AI); } - CopyMap[Def] = MI; - AvailCopyMap[Def] = MI; - for (MCSubRegIterator SR(Def, TRI); SR.isValid(); ++SR) { + for (MCSubRegIterator SR(Def, TRI, /*IncludeSelf=*/true); SR.isValid(); + ++SR) { CopyMap[*SR] = MI; AvailCopyMap[*SR] = MI; } diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp index 04321f3..0703df0 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -54,23 +55,28 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, GCModuleInfo* gmi) : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) { if (TM.getRegisterInfo()) - RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo()); + RegInfo = new (Allocator) MachineRegisterInfo(TM); else RegInfo = 0; + MFInfo = 0; - FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering(), - TM.Options.RealignStack); + FrameInfo = + new (Allocator) MachineFrameInfo(TM,!F->hasFnAttribute("no-realign-stack")); + if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::StackAlignment)) FrameInfo->ensureMaxAlignment(Fn->getAttributes(). getStackAlignment(AttributeSet::FunctionIndex)); - ConstantPool = new (Allocator) MachineConstantPool(TM.getDataLayout()); + + ConstantPool = new (Allocator) MachineConstantPool(TM); Alignment = TM.getTargetLowering()->getMinFunctionAlignment(); + // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) Alignment = std::max(Alignment, TM.getTargetLowering()->getPrefFunctionAlignment()); + FunctionNumber = FunctionNum; JumpTableInfo = 0; } @@ -456,11 +462,15 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const { // MachineFrameInfo implementation //===----------------------------------------------------------------------===// +const TargetFrameLowering *MachineFrameInfo::getFrameLowering() const { + return TM.getFrameLowering(); +} + /// ensureMaxAlignment - Make sure the function is at least Align bytes /// aligned. void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { - if (!TFI.isStackRealignable() || !RealignOption) - assert(Align <= TFI.getStackAlignment() && + if (!getFrameLowering()->isStackRealignable() || !RealignOption) + assert(Align <= getFrameLowering()->getStackAlignment() && "For targets without stack realignment, Align is out of limit!"); if (MaxAlignment < Align) MaxAlignment = Align; } @@ -482,8 +492,10 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, bool MayNeedSP, const AllocaInst *Alloca) { assert(Size != 0 && "Cannot allocate zero size stack objects!"); - Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Alignment, TFI.getStackAlignment()); + Alignment = + clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Alignment, getFrameLowering()->getStackAlignment()); Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP, Alloca)); int Index = (int)Objects.size() - NumFixedObjects - 1; @@ -498,8 +510,10 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, /// int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, unsigned Alignment) { - Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Alignment, TFI.getStackAlignment()); + Alignment = + clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Alignment, getFrameLowering()->getStackAlignment()); CreateStackObject(Size, Alignment, true, false); int Index = (int)Objects.size() - NumFixedObjects - 1; ensureMaxAlignment(Alignment); @@ -513,8 +527,10 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, /// int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) { HasVarSizedObjects = true; - Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Alignment, TFI.getStackAlignment()); + Alignment = + clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Alignment, getFrameLowering()->getStackAlignment()); Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; @@ -532,10 +548,12 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, // the incoming frame position. If the frame object is at offset 32 and // the stack is guaranteed to be 16-byte aligned, then we know that the // object is 16-byte aligned. - unsigned StackAlign = TFI.getStackAlignment(); + unsigned StackAlign = getFrameLowering()->getStackAlignment(); unsigned Align = MinAlign(SPOffset, StackAlign); - Align = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Align, TFI.getStackAlignment()); + Align = + clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Align, getFrameLowering()->getStackAlignment()); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, /*NeedSP*/ false, @@ -769,6 +787,10 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); } void MachineConstantPoolValue::anchor() { } +const DataLayout *MachineConstantPool::getDataLayout() const { + return TM.getDataLayout(); +} + Type *MachineConstantPoolEntry::getType() const { if (isMachineConstantPoolEntry()) return Val.MachineCPVal->getType(); @@ -850,7 +872,8 @@ unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, // FIXME, this could be made much more efficient for large constant pools. for (unsigned i = 0, e = Constants.size(); i != e; ++i) if (!Constants[i].isMachineConstantPoolEntry() && - CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, TD)) { + CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, + getDataLayout())) { if ((unsigned)Constants[i].getAlignment() < Alignment) Constants[i].Alignment = Alignment; return i; @@ -887,7 +910,7 @@ void MachineConstantPool::print(raw_ostream &OS) const { if (Constants[i].isMachineConstantPoolEntry()) Constants[i].Val.MachineCPVal->print(OS); else - OS << *(const Value*)Constants[i].Val.ConstVal; + WriteAsOperand(OS, Constants[i].Val.ConstVal, /*PrintType=*/false); OS << ", align=" << Constants[i].getAlignment(); OS << "\n"; } diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index 32d0668..295b450 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -647,12 +647,15 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) { } } +#ifndef NDEBUG + bool isMetaDataOp = Op.getType() == MachineOperand::MO_Metadata; // OpNo now points as the desired insertion point. Unless this is a variadic // instruction, only implicit regs are allowed beyond MCID->getNumOperands(). // RegMask operands go between the explicit and implicit operands. assert((isImpReg || Op.isRegMask() || MCID->isVariadic() || - OpNo < MCID->getNumOperands()) && + OpNo < MCID->getNumOperands() || isMetaDataOp) && "Trying to add an operand to a machine instr that is already done!"); +#endif MachineRegisterInfo *MRI = getRegInfo(); @@ -1253,32 +1256,6 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, return true; } -/// isSafeToReMat - Return true if it's safe to rematerialize the specified -/// instruction which defined the specified register instead of copying it. -bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, - AliasAnalysis *AA, - unsigned DstReg) const { - bool SawStore = false; - if (!TII->isTriviallyReMaterializable(this, AA) || - !isSafeToMove(TII, AA, SawStore)) - return false; - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - const MachineOperand &MO = getOperand(i); - if (!MO.isReg()) - continue; - // FIXME: For now, do not remat any instruction with register operands. - // Later on, we can loosen the restriction is the register operands have - // not been modified between the def and use. Note, this is different from - // MachineSink because the code is no longer in two-address form (at least - // partially). - if (MO.isUse()) - return false; - else if (!MO.isDead() && MO.getReg() != DstReg) - return false; - } - return true; -} - /// hasOrderedMemoryRef - Return true if this instruction may have an ordered /// or volatile memory reference, or if the information describing the memory /// reference is not available. Return false if it is known to have no ordered @@ -1411,8 +1388,10 @@ static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, const LLVMContext &Ctx = MF->getFunction()->getContext(); if (!DL.isUnknown()) { // Print source line info. DIScope Scope(DL.getScope(Ctx)); + assert((!Scope || Scope.isScope()) && + "Scope of a DebugLoc should be null or a DIScope."); // Omit the directory, because it's likely to be long and uninteresting. - if (Scope.Verify()) + if (Scope) CommentOS << Scope.getFilename(); else CommentOS << "<unknown>"; @@ -1726,31 +1705,31 @@ void MachineInstr::clearRegisterKills(unsigned Reg, } } -bool MachineInstr::addRegisterDead(unsigned IncomingReg, +bool MachineInstr::addRegisterDead(unsigned Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { - bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); + bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(Reg); bool hasAliases = isPhysReg && - MCRegAliasIterator(IncomingReg, RegInfo, false).isValid(); + MCRegAliasIterator(Reg, RegInfo, false).isValid(); bool Found = false; SmallVector<unsigned,4> DeadOps; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { MachineOperand &MO = getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (!Reg) + unsigned MOReg = MO.getReg(); + if (!MOReg) continue; - if (Reg == IncomingReg) { + if (MOReg == Reg) { MO.setIsDead(); Found = true; } else if (hasAliases && MO.isDead() && - TargetRegisterInfo::isPhysicalRegister(Reg)) { + TargetRegisterInfo::isPhysicalRegister(MOReg)) { // There exists a super-register that's marked dead. - if (RegInfo->isSuperRegister(IncomingReg, Reg)) + if (RegInfo->isSuperRegister(Reg, MOReg)) return true; - if (RegInfo->isSubRegister(IncomingReg, Reg)) + if (RegInfo->isSubRegister(Reg, MOReg)) DeadOps.push_back(i); } } @@ -1770,7 +1749,7 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, if (Found || !AddIfNotFound) return Found; - addOperand(MachineOperand::CreateReg(IncomingReg, + addOperand(MachineOperand::CreateReg(Reg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/, @@ -1778,21 +1757,21 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, return true; } -void MachineInstr::addRegisterDefined(unsigned IncomingReg, +void MachineInstr::addRegisterDefined(unsigned Reg, const TargetRegisterInfo *RegInfo) { - if (TargetRegisterInfo::isPhysicalRegister(IncomingReg)) { - MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + MachineOperand *MO = findRegisterDefOperand(Reg, false, RegInfo); if (MO) return; } else { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); - if (MO.isReg() && MO.getReg() == IncomingReg && MO.isDef() && + if (MO.isReg() && MO.getReg() == Reg && MO.isDef() && MO.getSubReg() == 0) return; } } - addOperand(MachineOperand::CreateReg(IncomingReg, + addOperand(MachineOperand::CreateReg(Reg, true /*IsDef*/, true /*IsImp*/)); } diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp index ed3ed4d..104eacd 100644 --- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -172,7 +172,7 @@ namespace { BitVector &PhysRegDefs, BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, - SmallVector<CandidateInfo, 32> &Candidates); + SmallVectorImpl<CandidateInfo> &Candidates); /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the /// current loop. @@ -404,7 +404,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, - SmallVector<CandidateInfo, 32> &Candidates) { + SmallVectorImpl<CandidateInfo> &Candidates) { bool RuledOut = false; bool HasNonInvariantUse = false; unsigned Def = 0; @@ -468,12 +468,12 @@ void MachineLICM::ProcessMI(MachineInstr *MI, for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { if (PhysRegDefs.test(*AS)) PhysRegClobbers.set(*AS); - if (PhysRegClobbers.test(*AS)) - // MI defined register is seen defined by another instruction in - // the loop, it cannot be a LICM candidate. - RuledOut = true; PhysRegDefs.set(*AS); } + if (PhysRegClobbers.test(Reg)) + // MI defined register is seen defined by another instruction in + // the loop, it cannot be a LICM candidate. + RuledOut = true; } // Only consider reloads for now and remats which do not have register @@ -502,7 +502,7 @@ void MachineLICM::HoistRegionPostRA() { // Walk the entire region, count number of defs for each register, and // collect potential LICM candidates. - const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); + const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *BB = Blocks[i]; @@ -584,7 +584,7 @@ void MachineLICM::HoistRegionPostRA() { /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current /// loop, and make sure it is not killed by any instructions in the loop. void MachineLICM::AddToLiveIns(unsigned Reg) { - const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); + const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *BB = Blocks[i]; if (!BB->isLiveIn(Reg)) @@ -1084,7 +1084,7 @@ bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, return true; for (unsigned i = BackTrace.size(); i != 0; --i) { - SmallVector<unsigned, 8> &RP = BackTrace[i-1]; + SmallVectorImpl<unsigned> &RP = BackTrace[i-1]; if (RP[RCId] + Cost >= Limit) return true; } @@ -1130,7 +1130,7 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { // Update register pressure of blocks from loop header to current block. for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) { - SmallVector<unsigned, 8> &RP = BackTrace[i]; + SmallVectorImpl<unsigned> &RP = BackTrace[i]; for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); CI != CE; ++CI) { unsigned RCId = CI->first; diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp index 8af9d05..bb54284 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -253,13 +253,12 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, const MCObjectFileInfo *MOFI) - : ImmutablePass(ID), Context(MAI, MRI, MOFI, 0, false) { + : ImmutablePass(ID), Context(&MAI, &MRI, MOFI, 0, false) { initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); } MachineModuleInfo::MachineModuleInfo() - : ImmutablePass(ID), - Context(*(MCAsmInfo*)0, *(MCRegisterInfo*)0, (MCObjectFileInfo*)0) { + : ImmutablePass(ID), Context(0, 0, 0) { llvm_unreachable("This MachineModuleInfo constructor should never be called, " "MMI should always be explicitly constructed by " "LLVMTargetMachine"); @@ -303,7 +302,7 @@ bool MachineModuleInfo::doFinalization(Module &M) { /// void MachineModuleInfo::EndFunction() { // Clean up frame info. - FrameMoves.clear(); + FrameInstructions.clear(); // Clean up exception info. LandingPads.clear(); diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 68372f6..f8b8796 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -19,16 +19,21 @@ using namespace llvm; -MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) - : TRI(&TRI), IsSSA(true), TracksLiveness(true) { +// Pin the vtable to this file. +void MachineRegisterInfo::Delegate::anchor() {} + +MachineRegisterInfo::MachineRegisterInfo(const TargetMachine &TM) + : TM(TM), TheDelegate(0), IsSSA(true), TracksLiveness(true) { VRegInfo.reserve(256); RegAllocHints.reserve(256); - UsedRegUnits.resize(TRI.getNumRegUnits()); - UsedPhysRegMask.resize(TRI.getNumRegs()); + UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits()); + UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs()); // Create the physreg use/def lists. - PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()]; - memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs()); + PhysRegUseDefLists = + new MachineOperand*[getTargetRegisterInfo()->getNumRegs()]; + memset(PhysRegUseDefLists, 0, + sizeof(MachineOperand*)*getTargetRegisterInfo()->getNumRegs()); } MachineRegisterInfo::~MachineRegisterInfo() { @@ -50,7 +55,8 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg, const TargetRegisterClass *OldRC = getRegClass(Reg); if (OldRC == RC) return RC; - const TargetRegisterClass *NewRC = TRI->getCommonSubClass(OldRC, RC); + const TargetRegisterClass *NewRC = + getTargetRegisterInfo()->getCommonSubClass(OldRC, RC); if (!NewRC || NewRC == OldRC) return NewRC; if (NewRC->getNumRegs() < MinNumRegs) @@ -63,7 +69,8 @@ bool MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterClass *OldRC = getRegClass(Reg); - const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC); + const TargetRegisterClass *NewRC = + getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC); // Stop early if there is no room to grow. if (NewRC == OldRC) @@ -73,14 +80,16 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E; ++I) { const TargetRegisterClass *OpRC = - I->getRegClassConstraint(I.getOperandNo(), TII, TRI); + I->getRegClassConstraint(I.getOperandNo(), TII, + getTargetRegisterInfo()); if (unsigned SubIdx = I.getOperand().getSubReg()) { if (OpRC) - NewRC = TRI->getMatchingSuperRegClass(NewRC, OpRC, SubIdx); + NewRC = getTargetRegisterInfo()->getMatchingSuperRegClass(NewRC, OpRC, + SubIdx); else - NewRC = TRI->getSubClassWithSubReg(NewRC, SubIdx); + NewRC = getTargetRegisterInfo()->getSubClassWithSubReg(NewRC, SubIdx); } else if (OpRC) - NewRC = TRI->getCommonSubClass(NewRC, OpRC); + NewRC = getTargetRegisterInfo()->getCommonSubClass(NewRC, OpRC); if (!NewRC || NewRC == OldRC) return false; } @@ -102,6 +111,8 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ VRegInfo.grow(Reg); VRegInfo[Reg].first = RegClass; RegAllocHints.grow(Reg); + if (TheDelegate) + TheDelegate->MRI_NoteNewVirtualRegister(Reg); return Reg; } @@ -126,24 +137,28 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const { MachineOperand *MO = &I.getOperand(); MachineInstr *MI = MO->getParent(); if (!MI) { - errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " use list MachineOperand " << MO << " has no parent instruction.\n"; Valid = false; } MachineOperand *MO0 = &MI->getOperand(0); unsigned NumOps = MI->getNumOperands(); if (!(MO >= MO0 && MO < MO0+NumOps)) { - errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " use list MachineOperand " << MO << " doesn't belong to parent MI: " << *MI; Valid = false; } if (!MO->isReg()) { - errs() << PrintReg(Reg, TRI) << " MachineOperand " << MO << ": " << *MO + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " MachineOperand " << MO << ": " << *MO << " is not a register\n"; Valid = false; } if (MO->getReg() != Reg) { - errs() << PrintReg(Reg, TRI) << " use-list MachineOperand " << MO << ": " + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " use-list MachineOperand " << MO << ": " << *MO << " is the wrong register\n"; Valid = false; } @@ -156,7 +171,7 @@ void MachineRegisterInfo::verifyUseLists() const { #ifndef NDEBUG for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) verifyUseList(TargetRegisterInfo::index2VirtReg(i)); - for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i) + for (unsigned i = 1, e = getTargetRegisterInfo()->getNumRegs(); i != e; ++i) verifyUseList(i); #endif } @@ -390,8 +405,8 @@ void MachineRegisterInfo::dumpUses(unsigned Reg) const { #endif void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) { - ReservedRegs = TRI->getReservedRegs(MF); - assert(ReservedRegs.size() == TRI->getNumRegs() && + ReservedRegs = getTargetRegisterInfo()->getReservedRegs(MF); + assert(ReservedRegs.size() == getTargetRegisterInfo()->getNumRegs() && "Invalid ReservedRegs vector from target"); } @@ -401,7 +416,8 @@ bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg, // Check if any overlapping register is modified, or allocatable so it may be // used later. - for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) + for (MCRegAliasIterator AI(PhysReg, getTargetRegisterInfo(), true); + AI.isValid(); ++AI) if (!def_empty(*AI) || isAllocatable(*AI)) return false; return true; diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp index bb6aad7..17f0af8 100644 --- a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -77,7 +77,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) { static unsigned LookForIdenticalPHI(MachineBasicBlock *BB, - SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> &PredValues) { + SmallVectorImpl<std::pair<MachineBasicBlock*, unsigned> > &PredValues) { if (BB->empty()) return 0; diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp index fff6b2b..e71c4df 100644 --- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDFS.h" @@ -30,6 +31,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" #include <queue> using namespace llvm; @@ -51,10 +53,11 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG -// FIXME: remove this flag after initial testing. It should always be a good -// thing. -static cl::opt<bool> EnableCopyConstrain("misched-vcopy", cl::Hidden, - cl::desc("Constrain vreg copies."), cl::init(true)); +static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden, + cl::desc("Enable register pressure scheduling."), cl::init(true)); + +static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden, + cl::desc("Enable cyclic critical path analysis."), cl::init(true)); static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, cl::desc("Enable load clustering."), cl::init(true)); @@ -69,6 +72,10 @@ static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden, // DAG subtrees must have at least this many nodes. static const unsigned MinSubtreeSize = 8; +// Pin the vtables to this file. +void MachineSchedStrategy::anchor() {} +void ScheduleDAGMutation::anchor() {} + //===----------------------------------------------------------------------===// // Machine Instruction Scheduling Pass and Registry //===----------------------------------------------------------------------===// @@ -98,6 +105,9 @@ public: virtual void print(raw_ostream &O, const Module* = 0) const; static char ID; // Class identification, replacement for typeinfo + +protected: + ScheduleDAGInstrs *createMachineScheduler(); }; } // namespace @@ -152,12 +162,13 @@ DefaultSchedRegistry("default", "Use the target's default scheduler choice.", /// Forward declare the standard machine scheduler. This will be used as the /// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); +static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C); /// Decrement this iterator until reaching the top or a non-debug instr. -static MachineBasicBlock::iterator -priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { +static MachineBasicBlock::const_iterator +priorNonDebug(MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator Beg) { assert(I != Beg && "reached the top of the region, cannot decrement"); while (--I != Beg) { if (!I->isDebugValue()) @@ -166,10 +177,19 @@ priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { return I; } +/// Non-const version. +static MachineBasicBlock::iterator +priorNonDebug(MachineBasicBlock::iterator I, + MachineBasicBlock::const_iterator Beg) { + return const_cast<MachineInstr*>( + &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)); +} + /// If this iterator is a debug value, increment until reaching the End or a /// non-debug instruction. -static MachineBasicBlock::iterator -nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { +static MachineBasicBlock::const_iterator +nextIfDebug(MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator End) { for(; I != End; ++I) { if (!I->isDebugValue()) break; @@ -177,6 +197,34 @@ nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { return I; } +/// Non-const version. +static MachineBasicBlock::iterator +nextIfDebug(MachineBasicBlock::iterator I, + MachineBasicBlock::const_iterator End) { + // Cast the return value to nonconst MachineInstr, then cast to an + // instr_iterator, which does not check for null, finally return a + // bundle_iterator. + return MachineBasicBlock::instr_iterator( + const_cast<MachineInstr*>( + &*nextIfDebug(MachineBasicBlock::const_iterator(I), End))); +} + +/// Instantiate a ScheduleDAGInstrs that will be owned by the caller. +ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() { + // Select the scheduler, or set the default. + MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; + if (Ctor != useDefaultMachineSched) + return Ctor(this); + + // Get the default scheduler set by the target for this function. + ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this); + if (Scheduler) + return Scheduler; + + // Default to GenericScheduler. + return createGenericSched(this); +} + /// Top-level MachineScheduler pass driver. /// /// Visit blocks in function order. Divide each block into scheduling regions @@ -207,23 +255,14 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); if (VerifyScheduling) { - DEBUG(LIS->print(dbgs())); + DEBUG(LIS->dump()); MF->verify(this, "Before machine scheduling."); } RegClassInfo->runOnMachineFunction(*MF); - // Select the scheduler, or set the default. - MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; - if (Ctor == useDefaultMachineSched) { - // Get the default scheduler set by the target. - Ctor = MachineSchedRegistry::getDefault(); - if (!Ctor) { - Ctor = createConvergingSched; - MachineSchedRegistry::setDefault(Ctor); - } - } - // Instantiate the selected scheduler. - OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); + // Instantiate the selected scheduler for this target, function, and + // optimization level. + OwningPtr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); // Visit all machine basic blocks. // @@ -258,14 +297,15 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // The next region starts above the previous region. Look backward in the // instruction stream until we find the nearest boundary. + unsigned NumRegionInstrs = 0; MachineBasicBlock::iterator I = RegionEnd; - for(;I != MBB->begin(); --I, --RemainingInstrs) { + for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) { if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) break; } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. - Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs); + Scheduler->enterRegion(MBB, I, RegionEnd, NumRegionInstrs); // Skip empty scheduling regions (0 or 1 schedulable instructions). if (I == RegionEnd || I == llvm::prior(RegionEnd)) { @@ -280,7 +320,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; - dbgs() << " Remaining: " << RemainingInstrs << "\n"); + dbgs() << " RegionInstrs: " << NumRegionInstrs + << " Remaining: " << RemainingInstrs << "\n"); // Schedule a region: possibly reorder instructions. // This invalidates 'RegionEnd' and 'I'. @@ -297,7 +338,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { Scheduler->finishBlock(); } Scheduler->finalizeSchedule(); - DEBUG(LIS->print(dbgs())); + DEBUG(LIS->dump()); if (VerifyScheduling) MF->verify(this, "After machine scheduling."); return true; @@ -309,7 +350,7 @@ void MachineScheduler::print(raw_ostream &O, const Module* m) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ReadyQueue::dump() { - dbgs() << " " << Name << ": "; + dbgs() << Name << ": "; for (unsigned i = 0, e = Queue.size(); i < e; ++i) dbgs() << Queue[i]->NodeNum << " "; dbgs() << "\n"; @@ -449,13 +490,19 @@ bool ScheduleDAGMI::checkSchedLimit() { void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, - unsigned endcount) + unsigned regioninstrs) { - ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); // For convenience remember the end of the liveness region. LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); + + SUPressureDiffs.clear(); + + SchedImpl->initPolicy(begin, end, regioninstrs); + + ShouldTrackPressure = SchedImpl->shouldTrackPressure(); } // Setup the register pressure trackers for the top scheduled top and bottom @@ -467,7 +514,7 @@ void ScheduleDAGMI::initRegPressure() { // Close the RPTracker to finalize live ins. RPTracker.closeRegion(); - DEBUG(RPTracker.getPressure().dump(TRI)); + DEBUG(RPTracker.dump()); // Initialize the live ins and live outs. TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); @@ -479,9 +526,23 @@ void ScheduleDAGMI::initRegPressure() { TopRPTracker.closeTop(); BotRPTracker.closeBottom(); + BotRPTracker.initLiveThru(RPTracker); + if (!BotRPTracker.getLiveThru().empty()) { + TopRPTracker.initLiveThru(BotRPTracker.getLiveThru()); + DEBUG(dbgs() << "Live Thru: "; + dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI)); + }; + + // For each live out vreg reduce the pressure change associated with other + // uses of the same vreg below the live-out reaching def. + updatePressureDiffs(RPTracker.getPressure().LiveOutRegs); + // Account for liveness generated by the region boundary. - if (LiveRegionEnd != RegionEnd) - BotRPTracker.recede(); + if (LiveRegionEnd != RegionEnd) { + SmallVector<unsigned, 8> LiveUses; + BotRPTracker.recede(&LiveUses); + updatePressureDiffs(LiveUses); + } assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); @@ -491,38 +552,88 @@ void ScheduleDAGMI::initRegPressure() { const std::vector<unsigned> &RegionPressure = RPTracker.getPressure().MaxSetPressure; for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { - unsigned Limit = TRI->getRegPressureSetLimit(i); - DEBUG(dbgs() << TRI->getRegPressureSetName(i) - << "Limit " << Limit - << " Actual " << RegionPressure[i] << "\n"); - if (RegionPressure[i] > Limit) - RegionCriticalPSets.push_back(PressureElement(i, 0)); + unsigned Limit = RegClassInfo->getRegPressureSetLimit(i); + if (RegionPressure[i] > Limit) { + DEBUG(dbgs() << TRI->getRegPressureSetName(i) + << " Limit " << Limit + << " Actual " << RegionPressure[i] << "\n"); + RegionCriticalPSets.push_back(PressureChange(i)); + } } DEBUG(dbgs() << "Excess PSets: "; for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) dbgs() << TRI->getRegPressureSetName( - RegionCriticalPSets[i].PSetID) << " "; + RegionCriticalPSets[i].getPSet()) << " "; dbgs() << "\n"); } -// FIXME: When the pressure tracker deals in pressure differences then we won't -// iterate over all RegionCriticalPSets[i]. void ScheduleDAGMI:: -updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) { - for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { - unsigned ID = RegionCriticalPSets[i].PSetID; - int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; - if ((int)NewMaxPressure[ID] > MaxUnits) - MaxUnits = NewMaxPressure[ID]; +updateScheduledPressure(const SUnit *SU, + const std::vector<unsigned> &NewMaxPressure) { + const PressureDiff &PDiff = getPressureDiff(SU); + unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size(); + for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end(); + I != E; ++I) { + if (!I->isValid()) + break; + unsigned ID = I->getPSet(); + while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID) + ++CritIdx; + if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) { + if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc() + && NewMaxPressure[ID] <= INT16_MAX) + RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]); + } + unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); + if (NewMaxPressure[ID] >= Limit - 2) { + DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " + << NewMaxPressure[ID] << " > " << Limit << "(+ " + << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); + } } - DEBUG( - for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) { - unsigned Limit = TRI->getRegPressureSetLimit(i); - if (NewMaxPressure[i] > Limit ) { - dbgs() << " " << TRI->getRegPressureSetName(i) << ": " - << NewMaxPressure[i] << " > " << Limit << "\n"; +} + +/// Update the PressureDiff array for liveness after scheduling this +/// instruction. +void ScheduleDAGMI::updatePressureDiffs(ArrayRef<unsigned> LiveUses) { + for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) { + /// FIXME: Currently assuming single-use physregs. + unsigned Reg = LiveUses[LUIdx]; + DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); + if (!TRI->isVirtualRegister(Reg)) + continue; + + // This may be called before CurrentBottom has been initialized. However, + // BotRPTracker must have a valid position. We want the value live into the + // instruction or live out of the block, so ask for the previous + // instruction's live-out. + const LiveInterval &LI = LIS->getInterval(Reg); + VNInfo *VNI; + MachineBasicBlock::const_iterator I = + nextIfDebug(BotRPTracker.getPos(), BB->end()); + if (I == BB->end()) + VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); + else { + LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I)); + VNI = LRQ.valueIn(); + } + // RegisterPressureTracker guarantees that readsReg is true for LiveUses. + assert(VNI && "No live value at use."); + for (VReg2UseMap::iterator + UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { + SUnit *SU = UI->SU; + DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " + << *SU->getInstr()); + // If this use comes before the reaching def, it cannot be a last use, so + // descrease its pressure change. + if (!SU->isScheduled && SU != &ExitSU) { + LiveQueryResult LRQ + = LI.Query(LIS->getInstructionIndex(SU->getInstr())); + if (LRQ.valueIn() == VNI) + getPressureDiff(SU).addPressureChange(Reg, true, &MRI); } - }); + } + } } /// schedule - Called back from MachineScheduler::runOnMachineFunction @@ -580,15 +691,23 @@ void ScheduleDAGMI::schedule() { /// Build the DAG and setup three register pressure trackers. void ScheduleDAGMI::buildDAGWithRegPressure() { + if (!ShouldTrackPressure) { + RPTracker.reset(); + RegionCriticalPSets.clear(); + buildSchedGraph(AA); + return; + } + // Initialize the register pressure tracker used by buildSchedGraph. - RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, + /*TrackUntiedDefs=*/true); // Account for liveness generate by the region boundary. if (LiveRegionEnd != RegionEnd) RPTracker.recede(); // Build the DAG, and compute current register pressure. - buildSchedGraph(AA, &RPTracker); + buildSchedGraph(AA, &RPTracker, &SUPressureDiffs); // Initialize top/bottom trackers after computing region pressure. initRegPressure(); @@ -631,6 +750,91 @@ void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, ExitSU.biasCriticalPath(); } +/// Compute the max cyclic critical path through the DAG. The scheduling DAG +/// only provides the critical path for single block loops. To handle loops that +/// span blocks, we could use the vreg path latencies provided by +/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently +/// available for use in the scheduler. +/// +/// The cyclic path estimation identifies a def-use pair that crosses the back +/// edge and considers the depth and height of the nodes. For example, consider +/// the following instruction sequence where each instruction has unit latency +/// and defines an epomymous virtual register: +/// +/// a->b(a,c)->c(b)->d(c)->exit +/// +/// The cyclic critical path is a two cycles: b->c->b +/// The acyclic critical path is four cycles: a->b->c->d->exit +/// LiveOutHeight = height(c) = len(c->d->exit) = 2 +/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3 +/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4 +/// LiveInDepth = depth(b) = len(a->b) = 1 +/// +/// LiveOutDepth - LiveInDepth = 3 - 1 = 2 +/// LiveInHeight - LiveOutHeight = 4 - 2 = 2 +/// CyclicCriticalPath = min(2, 2) = 2 +unsigned ScheduleDAGMI::computeCyclicCriticalPath() { + // This only applies to single block loop. + if (!BB->isSuccessor(BB)) + return 0; + + unsigned MaxCyclicLatency = 0; + // Visit each live out vreg def to find def/use pairs that cross iterations. + ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs; + for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end(); + RI != RE; ++RI) { + unsigned Reg = *RI; + if (!TRI->isVirtualRegister(Reg)) + continue; + const LiveInterval &LI = LIS->getInterval(Reg); + const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); + if (!DefVNI) + continue; + + MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def); + const SUnit *DefSU = getSUnit(DefMI); + if (!DefSU) + continue; + + unsigned LiveOutHeight = DefSU->getHeight(); + unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; + // Visit all local users of the vreg def. + for (VReg2UseMap::iterator + UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { + if (UI->SU == &ExitSU) + continue; + + // Only consider uses of the phi. + LiveQueryResult LRQ = + LI.Query(LIS->getInstructionIndex(UI->SU->getInstr())); + if (!LRQ.valueIn()->isPHIDef()) + continue; + + // Assume that a path spanning two iterations is a cycle, which could + // overestimate in strange cases. This allows cyclic latency to be + // estimated as the minimum slack of the vreg's depth or height. + unsigned CyclicLatency = 0; + if (LiveOutDepth > UI->SU->getDepth()) + CyclicLatency = LiveOutDepth - UI->SU->getDepth(); + + unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency; + if (LiveInHeight > LiveOutHeight) { + if (LiveInHeight - LiveOutHeight < CyclicLatency) + CyclicLatency = LiveInHeight - LiveOutHeight; + } + else + CyclicLatency = 0; + + DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" + << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n"); + if (CyclicLatency > MaxCyclicLatency) + MaxCyclicLatency = CyclicLatency; + } + } + DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n"); + return MaxCyclicLatency; +} + /// Identify DAG roots and setup scheduler queues. void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots) { @@ -658,11 +862,13 @@ void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, SchedImpl->registerRoots(); // Advance past initial DebugValues. - assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); CurrentTop = nextIfDebug(RegionBegin, RegionEnd); - TopRPTracker.setPos(CurrentTop); - CurrentBottom = RegionEnd; + + if (ShouldTrackPressure) { + assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); + TopRPTracker.setPos(CurrentTop); + } } /// Move an instruction and update register pressure. @@ -679,10 +885,12 @@ void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { TopRPTracker.setPos(MI); } - // Update top scheduled pressure. - TopRPTracker.advance(); - assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); - updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); + if (ShouldTrackPressure) { + // Update top scheduled pressure. + TopRPTracker.advance(); + assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); + updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); + } } else { assert(SU->isBottomReady() && "node still has unscheduled dependencies"); @@ -698,10 +906,14 @@ void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { moveInstruction(MI, CurrentBottom); CurrentBottom = MI; } - // Update bottom scheduled pressure. - BotRPTracker.recede(); - assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); - updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); + if (ShouldTrackPressure) { + // Update bottom scheduled pressure. + SmallVector<unsigned, 8> LiveUses; + BotRPTracker.recede(&LiveUses); + assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); + updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); + updatePressureDiffs(LiveUses); + } } } @@ -1019,6 +1231,12 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) { GlobalSegment->start)) { return; } + // If the prior global segment may be defined by the same two-address + // instruction that also defines LocalLI, then can't make a hole here. + if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->start, + LocalLI->beginIndex())) { + return; + } // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise // it would be a disconnected component in the live range. assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() && @@ -1101,24 +1319,23 @@ void CopyConstrain::apply(ScheduleDAGMI *DAG) { } //===----------------------------------------------------------------------===// -// ConvergingScheduler - Implementation of the standard MachineSchedStrategy. +// GenericScheduler - Implementation of the generic MachineSchedStrategy. //===----------------------------------------------------------------------===// namespace { -/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance +/// GenericScheduler shrinks the unscheduled zone using heuristics to balance /// the schedule. -class ConvergingScheduler : public MachineSchedStrategy { +class GenericScheduler : public MachineSchedStrategy { public: /// Represent the type of SchedCandidate found within a single queue. /// pickNodeBidirectional depends on these listed by decreasing priority. enum CandReason { - NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster, Weak, + NoCand, PhysRegCopy, RegExcess, RegCritical, Cluster, Weak, RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, - TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse, - NodeOrder}; + TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; #ifndef NDEBUG - static const char *getReasonStr(ConvergingScheduler::CandReason Reason); + static const char *getReasonStr(GenericScheduler::CandReason Reason); #endif /// Policy for scheduling the next instruction in the candidate's zone. @@ -1149,7 +1366,7 @@ public: } }; - /// Store the state used by ConvergingScheduler heuristics, required for the + /// Store the state used by GenericScheduler heuristics, required for the /// lifetime of one invocation of pickNode(). struct SchedCandidate { CandPolicy Policy; @@ -1160,6 +1377,9 @@ public: // The reason for this candidate. CandReason Reason; + // Set of reasons that apply to multiple candidates. + uint32_t RepeatReasonSet; + // Register pressure values for the best candidate. RegPressureDelta RPDelta; @@ -1167,7 +1387,7 @@ public: SchedResourceDelta ResDelta; SchedCandidate(const CandPolicy &policy) - : Policy(policy), SU(NULL), Reason(NoCand) {} + : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {} bool isValid() const { return SU; } @@ -1180,6 +1400,9 @@ public: ResDelta = Best.ResDelta; } + bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); } + void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); } + void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); }; @@ -1188,33 +1411,27 @@ public: struct SchedRemainder { // Critical path through the DAG in expected latency. unsigned CriticalPath; + unsigned CyclicCritPath; + + // Scaled count of micro-ops left to schedule. + unsigned RemIssueCount; + + bool IsAcyclicLatencyLimited; // Unscheduled resources SmallVector<unsigned, 16> RemainingCounts; - // Critical resource for the unscheduled zone. - unsigned CritResIdx; - // Number of micro-ops left to schedule. - unsigned RemainingMicroOps; void reset() { CriticalPath = 0; + CyclicCritPath = 0; + RemIssueCount = 0; + IsAcyclicLatencyLimited = false; RemainingCounts.clear(); - CritResIdx = 0; - RemainingMicroOps = 0; } SchedRemainder() { reset(); } void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); - - unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const { - if (!SchedModel->hasInstrSchedModel()) - return 0; - - return std::max( - RemainingMicroOps * SchedModel->getMicroOpFactor(), - RemainingCounts[CritResIdx]); - } }; /// Each Scheduling boundary is associated with ready queues. It tracks the @@ -1235,8 +1452,13 @@ public: ScheduleHazardRecognizer *HazardRec; + /// Number of cycles it takes to issue the instructions scheduled in this + /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. + /// See getStalls(). unsigned CurrCycle; - unsigned IssueCount; + + /// Micro-ops issued in the current cycle + unsigned CurrMOps; /// MinReadyCycle - Cycle of the soonest available instruction. unsigned MinReadyCycle; @@ -1244,52 +1466,71 @@ public: // The expected latency of the critical path in this scheduled zone. unsigned ExpectedLatency; - // Resources used in the scheduled zone beyond this boundary. - SmallVector<unsigned, 16> ResourceCounts; + // The latency of dependence chains leading into this zone. + // For each node scheduled bottom-up: DLat = max DLat, N.Depth. + // For each cycle scheduled: DLat -= 1. + unsigned DependentLatency; + + /// Count the scheduled (issued) micro-ops that can be retired by + /// time=CurrCycle assuming the first scheduled instr is retired at time=0. + unsigned RetiredMOps; + + // Count scheduled resources that have been executed. Resources are + // considered executed if they become ready in the time that it takes to + // saturate any resource including the one in question. Counts are scaled + // for direct comparison with other resources. Counts can be compared with + // MOps * getMicroOpFactor and Latency * getLatencyFactor. + SmallVector<unsigned, 16> ExecutedResCounts; + + /// Cache the max count for a single resource. + unsigned MaxExecutedResCount; // Cache the critical resources ID in this scheduled zone. - unsigned CritResIdx; + unsigned ZoneCritResIdx; // Is the scheduled region resource limited vs. latency limited. bool IsResourceLimited; - unsigned ExpectedCount; - #ifndef NDEBUG - // Remember the greatest min operand latency. - unsigned MaxMinLatency; + // Remember the greatest operand latency as an upper bound on the number of + // times we should retry the pending queue because of a hazard. + unsigned MaxObservedLatency; #endif void reset() { // A new HazardRec is created for each DAG and owned by SchedBoundary. - delete HazardRec; - + // Destroying and reconstructing it is very expensive though. So keep + // invalid, placeholder HazardRecs. + if (HazardRec && HazardRec->isEnabled()) { + delete HazardRec; + HazardRec = 0; + } Available.clear(); Pending.clear(); CheckPending = false; NextSUs.clear(); - HazardRec = 0; CurrCycle = 0; - IssueCount = 0; + CurrMOps = 0; MinReadyCycle = UINT_MAX; ExpectedLatency = 0; - ResourceCounts.resize(1); - assert(!ResourceCounts[0] && "nonzero count for bad resource"); - CritResIdx = 0; + DependentLatency = 0; + RetiredMOps = 0; + MaxExecutedResCount = 0; + ZoneCritResIdx = 0; IsResourceLimited = false; - ExpectedCount = 0; #ifndef NDEBUG - MaxMinLatency = 0; + MaxObservedLatency = 0; #endif // Reserve a zero-count for invalid CritResIdx. - ResourceCounts.resize(1); + ExecutedResCounts.resize(1); + assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); } /// Pending queues extend the ready queues with the same ID and the /// PendingFlag set. SchedBoundary(unsigned ID, const Twine &Name): DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), - Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"), + Pending(ID << GenericScheduler::LogMaxQID, Name+".P"), HazardRec(0) { reset(); } @@ -1300,28 +1541,63 @@ public: SchedRemainder *rem); bool isTop() const { - return Available.getID() == ConvergingScheduler::TopQID; + return Available.getID() == GenericScheduler::TopQID; + } + +#ifndef NDEBUG + const char *getResourceName(unsigned PIdx) { + if (!PIdx) + return "MOps"; + return SchedModel->getProcResource(PIdx)->Name; + } +#endif + + /// Get the number of latency cycles "covered" by the scheduled + /// instructions. This is the larger of the critical path within the zone + /// and the number of cycles required to issue the instructions. + unsigned getScheduledLatency() const { + return std::max(ExpectedLatency, CurrCycle); } unsigned getUnscheduledLatency(SUnit *SU) const { - if (isTop()) - return SU->getHeight(); - return SU->getDepth() + SU->Latency; + return isTop() ? SU->getHeight() : SU->getDepth(); + } + + unsigned getResourceCount(unsigned ResIdx) const { + return ExecutedResCounts[ResIdx]; } + /// Get the scaled count of scheduled micro-ops and resources, including + /// executed resources. unsigned getCriticalCount() const { - return ResourceCounts[CritResIdx]; + if (!ZoneCritResIdx) + return RetiredMOps * SchedModel->getMicroOpFactor(); + return getResourceCount(ZoneCritResIdx); + } + + /// Get a scaled count for the minimum execution time of the scheduled + /// micro-ops that are ready to execute by getExecutedCount. Notice the + /// feedback loop. + unsigned getExecutedCount() const { + return std::max(CurrCycle * SchedModel->getLatencyFactor(), + MaxExecutedResCount); } bool checkHazard(SUnit *SU); - void setLatencyPolicy(CandPolicy &Policy); + unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs); + + unsigned getOtherResourceCount(unsigned &OtherCritIdx); + + void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone); void releaseNode(SUnit *SU, unsigned ReadyCycle); - void bumpCycle(); + void bumpCycle(unsigned NextCycle); - void countResource(unsigned PIdx, unsigned Cycles); + void incExecutedResources(unsigned PIdx, unsigned Count); + + unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle); void bumpNode(SUnit *SU); @@ -1330,9 +1606,14 @@ public: void removeReady(SUnit *SU); SUnit *pickOnlyChoice(); + +#ifndef NDEBUG + void dumpScheduledState(); +#endif }; private: + const MachineSchedContext *Context; ScheduleDAGMI *DAG; const TargetSchedModel *SchedModel; const TargetRegisterInfo *TRI; @@ -1342,6 +1623,7 @@ private: SchedBoundary Top; SchedBoundary Bot; + MachineSchedPolicy RegionPolicy; public: /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) enum { @@ -1350,8 +1632,15 @@ public: LogMaxQID = 2 }; - ConvergingScheduler(): - DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + GenericScheduler(const MachineSchedContext *C): + Context(C), DAG(0), SchedModel(0), TRI(0), + Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + + virtual void initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs); + + bool shouldTrackPressure() const { return RegionPolicy.ShouldTrackPressure; } virtual void initialize(ScheduleDAGMI *dag); @@ -1366,14 +1655,7 @@ public: virtual void registerRoots(); protected: - void balanceZones( - ConvergingScheduler::SchedBoundary &CriticalZone, - ConvergingScheduler::SchedCandidate &CriticalCand, - ConvergingScheduler::SchedBoundary &OppositeZone, - ConvergingScheduler::SchedCandidate &OppositeCand); - - void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, - ConvergingScheduler::SchedCandidate &BotCand); + void checkAcyclicLatency(); void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, @@ -1395,7 +1677,7 @@ protected: }; } // namespace -void ConvergingScheduler::SchedRemainder:: +void GenericScheduler::SchedRemainder:: init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { reset(); if (!SchedModel->hasInstrSchedModel()) @@ -1404,7 +1686,8 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { for (std::vector<SUnit>::iterator I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); - RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); + RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC) + * SchedModel->getMicroOpFactor(); for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { @@ -1413,26 +1696,61 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { RemainingCounts[PIdx] += (Factor * PI->Cycles); } } - for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds(); - PIdx != PEnd; ++PIdx) { - if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx]) - >= (int)SchedModel->getLatencyFactor()) { - CritResIdx = PIdx; - } - } } -void ConvergingScheduler::SchedBoundary:: +void GenericScheduler::SchedBoundary:: init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { reset(); DAG = dag; SchedModel = smodel; Rem = rem; if (SchedModel->hasInstrSchedModel()) - ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); + ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); +} + +/// Initialize the per-region scheduling policy. +void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) { + const TargetMachine &TM = Context->MF->getTarget(); + + // Avoid setting up the register pressure tracker for small regions to save + // compile time. As a rough heuristic, only track pressure when the number of + // schedulable instructions exceeds half the integer register file. + unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( + TM.getTargetLowering()->getRegClassFor(MVT::i32)); + + RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); + + // For generic targets, we default to bottom-up, because it's simpler and more + // compile-time optimizations have been implemented in that direction. + RegionPolicy.OnlyBottomUp = true; + + // Allow the subtarget to override default policy. + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs); + + // After subtarget overrides, apply command line options. + if (!EnableRegPressure) + RegionPolicy.ShouldTrackPressure = false; + + // Check -misched-topdown/bottomup can force or unforce scheduling direction. + // e.g. -misched-bottomup=false allows scheduling in both directions. + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); + if (ForceBottomUp.getNumOccurrences() > 0) { + RegionPolicy.OnlyBottomUp = ForceBottomUp; + if (RegionPolicy.OnlyBottomUp) + RegionPolicy.OnlyTopDown = false; + } + if (ForceTopDown.getNumOccurrences() > 0) { + RegionPolicy.OnlyTopDown = ForceTopDown; + if (RegionPolicy.OnlyTopDown) + RegionPolicy.OnlyBottomUp = false; + } } -void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { +void GenericScheduler::initialize(ScheduleDAGMI *dag) { DAG = dag; SchedModel = DAG->getSchedModel(); TRI = DAG->TRI; @@ -1447,31 +1765,36 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { // are disabled, then these HazardRecs will be disabled. const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); const TargetMachine &TM = DAG->MF.getTarget(); - Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); - Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); - - assert((!ForceTopDown || !ForceBottomUp) && - "-misched-topdown incompatible with -misched-bottomup"); + if (!Top.HazardRec) { + Top.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + } + if (!Bot.HazardRec) { + Bot.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + } } -void ConvergingScheduler::releaseTopNode(SUnit *SU) { +void GenericScheduler::releaseTopNode(SUnit *SU) { if (SU->isScheduled) return; for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { + if (I->isWeak()) + continue; unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned MinLatency = I->getMinLatency(); + unsigned Latency = I->getLatency(); #ifndef NDEBUG - Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); + Top.MaxObservedLatency = std::max(Latency, Top.MaxObservedLatency); #endif - if (SU->TopReadyCycle < PredReadyCycle + MinLatency) - SU->TopReadyCycle = PredReadyCycle + MinLatency; + if (SU->TopReadyCycle < PredReadyCycle + Latency) + SU->TopReadyCycle = PredReadyCycle + Latency; } Top.releaseNode(SU, SU->TopReadyCycle); } -void ConvergingScheduler::releaseBottomNode(SUnit *SU) { +void GenericScheduler::releaseBottomNode(SUnit *SU) { if (SU->isScheduled) return; @@ -1482,18 +1805,56 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) { if (I->isWeak()) continue; unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; - unsigned MinLatency = I->getMinLatency(); + unsigned Latency = I->getLatency(); #ifndef NDEBUG - Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); + Bot.MaxObservedLatency = std::max(Latency, Bot.MaxObservedLatency); #endif - if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) - SU->BotReadyCycle = SuccReadyCycle + MinLatency; + if (SU->BotReadyCycle < SuccReadyCycle + Latency) + SU->BotReadyCycle = SuccReadyCycle + Latency; } Bot.releaseNode(SU, SU->BotReadyCycle); } -void ConvergingScheduler::registerRoots() { +/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic +/// critical path by more cycles than it takes to drain the instruction buffer. +/// We estimate an upper bounds on in-flight instructions as: +/// +/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) +/// InFlightIterations = AcyclicPath / CyclesPerIteration +/// InFlightResources = InFlightIterations * LoopResources +/// +/// TODO: Check execution resources in addition to IssueCount. +void GenericScheduler::checkAcyclicLatency() { + if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) + return; + + // Scaled number of cycles per loop iteration. + unsigned IterCount = + std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), + Rem.RemIssueCount); + // Scaled acyclic critical path. + unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); + // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop + unsigned InFlightCount = + (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; + unsigned BufferLimit = + SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); + + Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; + + DEBUG(dbgs() << "IssueCycles=" + << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " + << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() + << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount + << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() + << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; + if (Rem.IsAcyclicLatencyLimited) + dbgs() << " ACYCLIC LATENCY LIMIT\n"); +} + +void GenericScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); + // Some roots may not feed into ExitSU. Check all of them in case. for (std::vector<SUnit*>::const_iterator I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { @@ -1501,6 +1862,11 @@ void ConvergingScheduler::registerRoots() { Rem.CriticalPath = (*I)->getDepth(); } DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); + + if (EnableCyclicPath) { + Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); + checkAcyclicLatency(); + } } /// Does this SU have a hazard within the current instruction group. @@ -1516,12 +1882,12 @@ void ConvergingScheduler::registerRoots() { /// can dispatch per cycle. /// /// TODO: Also check whether the SU must start a new group. -bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { +bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) { if (HazardRec->isEnabled()) return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); - if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { + if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); return true; @@ -1529,45 +1895,125 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { return false; } -/// Compute the remaining latency to determine whether ILP should be increased. -void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { - // FIXME: compile time. In all, we visit four queues here one we should only - // need to visit the one that was last popped if we cache the result. +// Find the unscheduled node in ReadySUs with the highest latency. +unsigned GenericScheduler::SchedBoundary:: +findMaxLatency(ArrayRef<SUnit*> ReadySUs) { + SUnit *LateSU = 0; unsigned RemLatency = 0; - for (ReadyQueue::iterator I = Available.begin(), E = Available.end(); + for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end(); I != E; ++I) { unsigned L = getUnscheduledLatency(*I); - DEBUG(dbgs() << " " << Available.getName() - << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n'); - if (L > RemLatency) + if (L > RemLatency) { RemLatency = L; + LateSU = *I; + } } - for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end(); - I != E; ++I) { - unsigned L = getUnscheduledLatency(*I); - if (L > RemLatency) - RemLatency = L; + if (LateSU) { + DEBUG(dbgs() << Available.getName() << " RemLatency SU(" + << LateSU->NodeNum << ") " << RemLatency << "c\n"); } - unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow(); - DEBUG(dbgs() << " " << Available.getName() - << " ExpectedLatency " << ExpectedLatency - << " CP Limit " << CriticalPathLimit << '\n'); - if (RemLatency + ExpectedLatency >= CriticalPathLimit - && RemLatency > Rem->getMaxRemainingCount(SchedModel)) { - Policy.ReduceLatency = true; - DEBUG(dbgs() << " Increase ILP: " << Available.getName() << '\n'); + return RemLatency; +} + +// Count resources in this zone and the remaining unscheduled +// instruction. Return the max count, scaled. Set OtherCritIdx to the critical +// resource index, or zero if the zone is issue limited. +unsigned GenericScheduler::SchedBoundary:: +getOtherResourceCount(unsigned &OtherCritIdx) { + OtherCritIdx = 0; + if (!SchedModel->hasInstrSchedModel()) + return 0; + + unsigned OtherCritCount = Rem->RemIssueCount + + (RetiredMOps * SchedModel->getMicroOpFactor()); + DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: " + << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); + for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); + PIdx != PEnd; ++PIdx) { + unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; + if (OtherCount > OtherCritCount) { + OtherCritCount = OtherCount; + OtherCritIdx = PIdx; + } + } + if (OtherCritIdx) { + DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " + << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) + << " " << getResourceName(OtherCritIdx) << "\n"); } + return OtherCritCount; } -void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, - unsigned ReadyCycle) { +/// Set the CandPolicy for this zone given the current resources and latencies +/// inside and outside the zone. +void GenericScheduler::SchedBoundary::setPolicy(CandPolicy &Policy, + SchedBoundary &OtherZone) { + // Now that potential stalls have been considered, apply preemptive heuristics + // based on the the total latency and resources inside and outside this + // zone. + + // Compute remaining latency. We need this both to determine whether the + // overall schedule has become latency-limited and whether the instructions + // outside this zone are resource or latency limited. + // + // The "dependent" latency is updated incrementally during scheduling as the + // max height/depth of scheduled nodes minus the cycles since it was + // scheduled: + // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone + // + // The "independent" latency is the max ready queue depth: + // ILat = max N.depth for N in Available|Pending + // + // RemainingLatency is the greater of independent and dependent latency. + unsigned RemLatency = DependentLatency; + RemLatency = std::max(RemLatency, findMaxLatency(Available.elements())); + RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements())); + + // Compute the critical resource outside the zone. + unsigned OtherCritIdx; + unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx); + + bool OtherResLimited = false; + if (SchedModel->hasInstrSchedModel()) { + unsigned LFactor = SchedModel->getLatencyFactor(); + OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; + } + if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) { + Policy.ReduceLatency |= true; + DEBUG(dbgs() << " " << Available.getName() << " RemainingLatency " + << RemLatency << " + " << CurrCycle << "c > CritPath " + << Rem->CriticalPath << "\n"); + } + // If the same resource is limiting inside and outside the zone, do nothing. + if (ZoneCritResIdx == OtherCritIdx) + return; + DEBUG( + if (IsResourceLimited) { + dbgs() << " " << Available.getName() << " ResourceLimited: " + << getResourceName(ZoneCritResIdx) << "\n"; + } + if (OtherResLimited) + dbgs() << " RemainingLimit: " << getResourceName(OtherCritIdx) << "\n"; + if (!IsResourceLimited && !OtherResLimited) + dbgs() << " Latency limited both directions.\n"); + + if (IsResourceLimited && !Policy.ReduceResIdx) + Policy.ReduceResIdx = ZoneCritResIdx; + + if (OtherResLimited) + Policy.DemandResIdx = OtherCritIdx; +} + +void GenericScheduler::SchedBoundary::releaseNode(SUnit *SU, + unsigned ReadyCycle) { if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; // Check for interlocks first. For the purpose of other heuristics, an // instruction that cannot issue appears as if it's not in the ReadyQueue. - if (ReadyCycle > CurrCycle || checkHazard(SU)) + bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; + if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU)) Pending.push(SU); else Available.push(SU); @@ -1577,16 +2023,21 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, } /// Move the boundary of scheduled code by one cycle. -void ConvergingScheduler::SchedBoundary::bumpCycle() { - unsigned Width = SchedModel->getIssueWidth(); - IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; - - unsigned NextCycle = CurrCycle + 1; - assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); - if (MinReadyCycle > NextCycle) { - IssueCount = 0; - NextCycle = MinReadyCycle; - } +void GenericScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) { + if (SchedModel->getMicroOpBufferSize() == 0) { + assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); + if (MinReadyCycle > NextCycle) + NextCycle = MinReadyCycle; + } + // Update the current micro-ops, which will issue in the next cycle. + unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); + CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; + + // Decrement DependentLatency based on the next cycle. + if ((NextCycle - CurrCycle) > DependentLatency) + DependentLatency = 0; + else + DependentLatency -= (NextCycle - CurrCycle); if (!HazardRec->isEnabled()) { // Bypass HazardRec virtual calls. @@ -1602,38 +2053,54 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() { } } CheckPending = true; - IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); + unsigned LFactor = SchedModel->getLatencyFactor(); + IsResourceLimited = + (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) + > (int)LFactor; + + DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); +} - DEBUG(dbgs() << " " << Available.getName() - << " Cycle: " << CurrCycle << '\n'); +void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx, + unsigned Count) { + ExecutedResCounts[PIdx] += Count; + if (ExecutedResCounts[PIdx] > MaxExecutedResCount) + MaxExecutedResCount = ExecutedResCounts[PIdx]; } /// Add the given processor resource to this scheduled zone. -void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, - unsigned Cycles) { +/// +/// \param Cycles indicates the number of consecutive (non-pipelined) cycles +/// during which this resource is consumed. +/// +/// \return the next cycle at which the instruction may execute without +/// oversubscribing resources. +unsigned GenericScheduler::SchedBoundary:: +countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) { unsigned Factor = SchedModel->getResourceFactor(PIdx); - DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name - << " +(" << Cycles << "x" << Factor - << ") / " << SchedModel->getLatencyFactor() << '\n'); - unsigned Count = Factor * Cycles; - ResourceCounts[PIdx] += Count; + DEBUG(dbgs() << " " << getResourceName(PIdx) + << " +" << Cycles << "x" << Factor << "u\n"); + + // Update Executed resources counts. + incExecutedResources(PIdx, Count); assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); Rem->RemainingCounts[PIdx] -= Count; - // Check if this resource exceeds the current critical resource by a full - // cycle. If so, it becomes the critical resource. - if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) - >= (int)SchedModel->getLatencyFactor()) { - CritResIdx = PIdx; + // Check if this resource exceeds the current critical resource. If so, it + // becomes the critical resource. + if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { + ZoneCritResIdx = PIdx; DEBUG(dbgs() << " *** Critical resource " - << SchedModel->getProcResource(PIdx)->Name << " x" - << ResourceCounts[PIdx] << '\n'); + << getResourceName(PIdx) << ": " + << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); } + // TODO: We don't yet model reserved resources. It's not hard though. + return CurrCycle; } /// Move the boundary of scheduled code by one SUnit. -void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { +void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { // Update the reservation table. if (HazardRec->isEnabled()) { if (!isTop() && SU->isCall) { @@ -1643,51 +2110,108 @@ void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { } HazardRec->EmitInstruction(SU); } + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); + CurrMOps += IncMOps; + // checkHazard prevents scheduling multiple instructions per cycle that exceed + // issue width. However, we commonly reach the maximum. In this case + // opportunistically bump the cycle to avoid uselessly checking everything in + // the readyQ. Furthermore, a single instruction may produce more than one + // cycle's worth of micro-ops. + // + // TODO: Also check if this SU must end a dispatch group. + unsigned NextCycle = CurrCycle; + if (CurrMOps >= SchedModel->getIssueWidth()) { + ++NextCycle; + DEBUG(dbgs() << " *** Max MOps " << CurrMOps + << " at cycle " << CurrCycle << '\n'); + } + unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); + DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); + + switch (SchedModel->getMicroOpBufferSize()) { + case 0: + assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); + break; + case 1: + if (ReadyCycle > NextCycle) { + NextCycle = ReadyCycle; + DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); + } + break; + default: + // We don't currently model the OOO reorder buffer, so consider all + // scheduled MOps to be "retired". + break; + } + RetiredMOps += IncMOps; + // Update resource counts and critical resource. if (SchedModel->hasInstrSchedModel()) { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); - Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC); + unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); + assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); + Rem->RemIssueCount -= DecRemIssue; + if (ZoneCritResIdx) { + // Scale scheduled micro-ops for comparing with the critical resource. + unsigned ScaledMOps = + RetiredMOps * SchedModel->getMicroOpFactor(); + + // If scaled micro-ops are now more than the previous critical resource by + // a full cycle, then micro-ops issue becomes critical. + if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) + >= (int)SchedModel->getLatencyFactor()) { + ZoneCritResIdx = 0; + DEBUG(dbgs() << " *** Critical resource NumMicroOps: " + << ScaledMOps / SchedModel->getLatencyFactor() << "c\n"); + } + } for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { - countResource(PI->ProcResourceIdx, PI->Cycles); + unsigned RCycle = + countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle); + if (RCycle > NextCycle) + NextCycle = RCycle; } } - if (isTop()) { - if (SU->getDepth() > ExpectedLatency) - ExpectedLatency = SU->getDepth(); + // Update ExpectedLatency and DependentLatency. + unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; + unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; + if (SU->getDepth() > TopLatency) { + TopLatency = SU->getDepth(); + DEBUG(dbgs() << " " << Available.getName() + << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n"); } - else { - if (SU->getHeight() > ExpectedLatency) - ExpectedLatency = SU->getHeight(); + if (SU->getHeight() > BotLatency) { + BotLatency = SU->getHeight(); + DEBUG(dbgs() << " " << Available.getName() + << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); } - - IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); - - // Check the instruction group dispatch limit. - // TODO: Check if this SU must end a dispatch group. - IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); - - // checkHazard prevents scheduling multiple instructions per cycle that exceed - // issue width. However, we commonly reach the maximum. In this case - // opportunistically bump the cycle to avoid uselessly checking everything in - // the readyQ. Furthermore, a single instruction may produce more than one - // cycle's worth of micro-ops. - if (IssueCount >= SchedModel->getIssueWidth()) { - DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n'); - bumpCycle(); + // If we stall for any reason, bump the cycle. + if (NextCycle > CurrCycle) { + bumpCycle(NextCycle); + } + else { + // After updating ZoneCritResIdx and ExpectedLatency, check if we're + // resource limited. If a stall occured, bumpCycle does this. + unsigned LFactor = SchedModel->getLatencyFactor(); + IsResourceLimited = + (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) + > (int)LFactor; } + DEBUG(dumpScheduledState()); } /// Release pending ready nodes in to the available queue. This makes them /// visible to heuristics. -void ConvergingScheduler::SchedBoundary::releasePending() { +void GenericScheduler::SchedBoundary::releasePending() { // If the available queue is empty, it is safe to reset MinReadyCycle. if (Available.empty()) MinReadyCycle = UINT_MAX; // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. + bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; for (unsigned i = 0, e = Pending.size(); i != e; ++i) { SUnit *SU = *(Pending.begin()+i); unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; @@ -1695,7 +2219,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() { if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; - if (ReadyCycle > CurrCycle) + if (!IsBuffered && ReadyCycle > CurrCycle) continue; if (checkHazard(SU)) @@ -1710,7 +2234,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() { } /// Remove SU from the ready set for this boundary. -void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { +void GenericScheduler::SchedBoundary::removeReady(SUnit *SU) { if (Available.isInQueue(SU)) Available.remove(Available.find(SU)); else { @@ -1722,11 +2246,11 @@ void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { /// If this queue only has one ready candidate, return it. As a side effect, /// defer any nodes that now hit a hazard, and advance the cycle until at least /// one node is ready. If multiple instructions are ready, return NULL. -SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { +SUnit *GenericScheduler::SchedBoundary::pickOnlyChoice() { if (CheckPending) releasePending(); - if (IssueCount > 0) { + if (CurrMOps > 0) { // Defer any ready instrs that now have a hazard. for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { if (checkHazard(*I)) { @@ -1738,9 +2262,9 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { } } for (unsigned i = 0; Available.empty(); ++i) { - assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && + assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) && "permanent hazard"); (void)i; - bumpCycle(); + bumpCycle(CurrCycle + 1); releasePending(); } if (Available.size() == 1) @@ -1748,106 +2272,33 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { return NULL; } -/// Record the candidate policy for opposite zones with different critical -/// resources. -/// -/// If the CriticalZone is latency limited, don't force a policy for the -/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed. -void ConvergingScheduler::balanceZones( - ConvergingScheduler::SchedBoundary &CriticalZone, - ConvergingScheduler::SchedCandidate &CriticalCand, - ConvergingScheduler::SchedBoundary &OppositeZone, - ConvergingScheduler::SchedCandidate &OppositeCand) { - - if (!CriticalZone.IsResourceLimited) - return; - assert(SchedModel->hasInstrSchedModel() && "required schedmodel"); - - SchedRemainder *Rem = CriticalZone.Rem; - - // If the critical zone is overconsuming a resource relative to the - // remainder, try to reduce it. - unsigned RemainingCritCount = - Rem->RemainingCounts[CriticalZone.CritResIdx]; - if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount) - > (int)SchedModel->getLatencyFactor()) { - CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; - DEBUG(dbgs() << " Balance " << CriticalZone.Available.getName() - << " reduce " - << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name - << '\n'); - } - // If the other zone is underconsuming a resource relative to the full zone, - // try to increase it. - unsigned OppositeCount = - OppositeZone.ResourceCounts[CriticalZone.CritResIdx]; - if ((int)(OppositeZone.ExpectedCount - OppositeCount) - > (int)SchedModel->getLatencyFactor()) { - OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; - DEBUG(dbgs() << " Balance " << OppositeZone.Available.getName() - << " demand " - << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name - << '\n'); - } -} - -/// Determine if the scheduled zones exceed resource limits or critical path and -/// set each candidate's ReduceHeight policy accordingly. -void ConvergingScheduler::checkResourceLimits( - ConvergingScheduler::SchedCandidate &TopCand, - ConvergingScheduler::SchedCandidate &BotCand) { - - // Set ReduceLatency to true if needed. - Bot.setLatencyPolicy(BotCand.Policy); - Top.setLatencyPolicy(TopCand.Policy); - - // Handle resource-limited regions. - if (Top.IsResourceLimited && Bot.IsResourceLimited - && Top.CritResIdx == Bot.CritResIdx) { - // If the scheduled critical resource in both zones is no longer the - // critical remaining resource, attempt to reduce resource height both ways. - if (Top.CritResIdx != Rem.CritResIdx) { - TopCand.Policy.ReduceResIdx = Top.CritResIdx; - BotCand.Policy.ReduceResIdx = Bot.CritResIdx; - DEBUG(dbgs() << " Reduce scheduled " - << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); - } - return; - } - // Handle latency-limited regions. - if (!Top.IsResourceLimited && !Bot.IsResourceLimited) { - // If the total scheduled expected latency exceeds the region's critical - // path then reduce latency both ways. - // - // Just because a zone is not resource limited does not mean it is latency - // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle - // to exceed expected latency. - if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath) - && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { - TopCand.Policy.ReduceLatency = true; - BotCand.Policy.ReduceLatency = true; - DEBUG(dbgs() << " Reduce scheduled latency " << Top.ExpectedLatency - << " + " << Bot.ExpectedLatency << '\n'); - } - return; +#ifndef NDEBUG +// This is useful information to dump after bumpNode. +// Note that the Queue contents are more useful before pickNodeFromQueue. +void GenericScheduler::SchedBoundary::dumpScheduledState() { + unsigned ResFactor; + unsigned ResCount; + if (ZoneCritResIdx) { + ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); + ResCount = getResourceCount(ZoneCritResIdx); } - // The critical resource is different in each zone, so request balancing. - - // Compute the cost of each zone. - Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); - Top.ExpectedCount = std::max( - Top.getCriticalCount(), - Top.ExpectedCount * SchedModel->getLatencyFactor()); - Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle); - Bot.ExpectedCount = std::max( - Bot.getCriticalCount(), - Bot.ExpectedCount * SchedModel->getLatencyFactor()); - - balanceZones(Top, TopCand, Bot, BotCand); - balanceZones(Bot, BotCand, Top, TopCand); + else { + ResFactor = SchedModel->getMicroOpFactor(); + ResCount = RetiredMOps * SchedModel->getMicroOpFactor(); + } + unsigned LFactor = SchedModel->getLatencyFactor(); + dbgs() << Available.getName() << " @" << CurrCycle << "c\n" + << " Retired: " << RetiredMOps; + dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; + dbgs() << "\n Critical: " << ResCount / LFactor << "c, " + << ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx) + << "\n ExpectedLatency: " << ExpectedLatency << "c\n" + << (IsResourceLimited ? " - Resource" : " - Latency") + << " limited.\n"; } +#endif -void ConvergingScheduler::SchedCandidate:: +void GenericScheduler::SchedCandidate:: initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { if (!Policy.ReduceResIdx && !Policy.DemandResIdx) @@ -1864,11 +2315,12 @@ initResourceDelta(const ScheduleDAGMI *DAG, } } + /// Return true if this heuristic determines order. static bool tryLess(int TryVal, int CandVal, - ConvergingScheduler::SchedCandidate &TryCand, - ConvergingScheduler::SchedCandidate &Cand, - ConvergingScheduler::CandReason Reason) { + GenericScheduler::SchedCandidate &TryCand, + GenericScheduler::SchedCandidate &Cand, + GenericScheduler::CandReason Reason) { if (TryVal < CandVal) { TryCand.Reason = Reason; return true; @@ -1878,13 +2330,14 @@ static bool tryLess(int TryVal, int CandVal, Cand.Reason = Reason; return true; } + Cand.setRepeat(Reason); return false; } static bool tryGreater(int TryVal, int CandVal, - ConvergingScheduler::SchedCandidate &TryCand, - ConvergingScheduler::SchedCandidate &Cand, - ConvergingScheduler::CandReason Reason) { + GenericScheduler::SchedCandidate &TryCand, + GenericScheduler::SchedCandidate &Cand, + GenericScheduler::CandReason Reason) { if (TryVal > CandVal) { TryCand.Reason = Reason; return true; @@ -1894,9 +2347,34 @@ static bool tryGreater(int TryVal, int CandVal, Cand.Reason = Reason; return true; } + Cand.setRepeat(Reason); return false; } +static bool tryPressure(const PressureChange &TryP, + const PressureChange &CandP, + GenericScheduler::SchedCandidate &TryCand, + GenericScheduler::SchedCandidate &Cand, + GenericScheduler::CandReason Reason) { + int TryRank = TryP.getPSetOrMax(); + int CandRank = CandP.getPSetOrMax(); + // If both candidates affect the same set, go with the smallest increase. + if (TryRank == CandRank) { + return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, + Reason); + } + // If one candidate decreases and the other increases, go with it. + // Invalid candidates have UnitInc==0. + if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, + Reason)) { + return true; + } + // If the candidates are decreasing pressure, reverse priority. + if (TryP.getUnitInc() < 0) + std::swap(TryRank, CandRank); + return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); +} + static unsigned getWeakLeft(const SUnit *SU, bool isTop) { return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; } @@ -1929,6 +2407,32 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) { return 0; } +static bool tryLatency(GenericScheduler::SchedCandidate &TryCand, + GenericScheduler::SchedCandidate &Cand, + GenericScheduler::SchedBoundary &Zone) { + if (Zone.isTop()) { + if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, GenericScheduler::TopDepthReduce)) + return true; + } + if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, GenericScheduler::TopPathReduce)) + return true; + } + else { + if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, GenericScheduler::BotHeightReduce)) + return true; + } + if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, GenericScheduler::BotPathReduce)) + return true; + } + return false; +} + /// Apply a set of heursitics to a new candidate. Heuristics are currently /// hierarchical. This may be more efficient than a graduated cost model because /// we don't need to evaluate all aspects of the model for each node in the @@ -1940,16 +2444,44 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) { /// \param Zone describes the scheduled zone that we are extending. /// \param RPTracker describes reg pressure within the scheduled zone. /// \param TempTracker is a scratch pressure tracker to reuse in queries. -void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, +void GenericScheduler::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary &Zone, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker) { - // Always initialize TryCand's RPDelta. - TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta, - DAG->getRegionCriticalPSets(), - DAG->getRegPressure().MaxSetPressure); + if (DAG->isTrackingPressure()) { + // Always initialize TryCand's RPDelta. + if (Zone.isTop()) { + TempTracker.getMaxDownwardPressureDelta( + TryCand.SU->getInstr(), + TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + } + else { + if (VerifyScheduling) { + TempTracker.getMaxUpwardPressureDelta( + TryCand.SU->getInstr(), + &DAG->getPressureDiff(TryCand.SU), + TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + } + else { + RPTracker.getUpwardPressureDelta( + TryCand.SU->getInstr(), + DAG->getPressureDiff(TryCand.SU), + TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + } + } + } + DEBUG(if (TryCand.RPDelta.Excess.isValid()) + dbgs() << " SU(" << TryCand.SU->NodeNum << ") " + << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet()) + << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n"); // Initialize the candidate if needed. if (!Cand.isValid()) { @@ -1962,20 +2494,25 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, TryCand, Cand, PhysRegCopy)) return; - // Avoid exceeding the target's limit. - if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, - Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) + // Avoid exceeding the target's limit. If signed PSetID is negative, it is + // invalid; convert it to INT_MAX to give it lowest priority. + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, + Cand.RPDelta.Excess, + TryCand, Cand, RegExcess)) return; - if (Cand.Reason == SingleExcess) - Cand.Reason = MultiPressure; // Avoid increasing the max critical pressure in the scheduled region. - if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease, - Cand.RPDelta.CriticalMax.UnitIncrease, - TryCand, Cand, SingleCritical)) + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, + Cand.RPDelta.CriticalMax, + TryCand, Cand, RegCritical)) + return; + + // For loops that are acyclic path limited, aggressively schedule for latency. + // This can result in very long dependence chains scheduled in sequence, so + // once every cycle (when CurrMOps == 0), switch to normal heuristics. + if (Rem.IsAcyclicLatencyLimited && !Zone.CurrMOps + && tryLatency(TryCand, Cand, Zone)) return; - if (Cand.Reason == SingleCritical) - Cand.Reason = MultiPressure; // Keep clustered nodes together to encourage downstream peephole // optimizations which may reduce resource requirements. @@ -1990,17 +2527,17 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, return; // Weak edges are for clustering and other constraints. - // - // Deferring TryCand here does not change Cand's reason. This is good in the - // sense that a bad candidate shouldn't affect a previous candidate's - // goodness, but bad in that it is assymetric and depends on queue order. - CandReason OrigReason = Cand.Reason; if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), getWeakLeft(Cand.SU, Zone.isTop()), TryCand, Cand, Weak)) { - Cand.Reason = OrigReason; return; } + // Avoid increasing the max pressure of the entire region. + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, + Cand.RPDelta.CurrentMax, + TryCand, Cand, RegMax)) + return; + // Avoid critical resource consumption and balance the schedule. TryCand.initResourceDelta(DAG, SchedModel); if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, @@ -2012,41 +2549,15 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, return; // Avoid serializing long latency dependence chains. - if (Cand.Policy.ReduceLatency) { - if (Zone.isTop()) { - if (Cand.SU->getDepth() * SchedModel->getLatencyFactor() - > Zone.ExpectedCount) { - if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, TopDepthReduce)) - return; - } - if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, TopPathReduce)) - return; - } - else { - if (Cand.SU->getHeight() * SchedModel->getLatencyFactor() - > Zone.ExpectedCount) { - if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, BotHeightReduce)) - return; - } - if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, BotPathReduce)) - return; - } - } - - // Avoid increasing the max pressure of the entire region. - if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease, - Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax)) + // For acyclic path limited loops, latency was already checked above. + if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited + && tryLatency(TryCand, Cand, Zone)) { return; - if (Cand.Reason == SingleMax) - Cand.Reason = MultiPressure; + } // Prefer immediate defs/users of the last scheduled instruction. This is a - // nice pressure avoidance strategy that also conserves the processor's - // register renaming resources and keeps the machine code readable. + // local pressure avoidance strategy that also makes the machine code + // readable. if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), TryCand, Cand, NextDefUse)) return; @@ -2058,49 +2569,17 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, } } -/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is -/// more desirable than RHS from scheduling standpoint. -static bool compareRPDelta(const RegPressureDelta &LHS, - const RegPressureDelta &RHS) { - // Compare each component of pressure in decreasing order of importance - // without checking if any are valid. Invalid PressureElements are assumed to - // have UnitIncrease==0, so are neutral. - - // Avoid increasing the max critical pressure in the scheduled region. - if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { - DEBUG(dbgs() << " RP excess top - bot: " - << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); - return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; - } - // Avoid increasing the max critical pressure in the scheduled region. - if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { - DEBUG(dbgs() << " RP critical top - bot: " - << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) - << '\n'); - return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; - } - // Avoid increasing the max pressure of the entire region. - if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { - DEBUG(dbgs() << " RP current top - bot: " - << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) - << '\n'); - return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; - } - return false; -} - #ifndef NDEBUG -const char *ConvergingScheduler::getReasonStr( - ConvergingScheduler::CandReason Reason) { +const char *GenericScheduler::getReasonStr( + GenericScheduler::CandReason Reason) { switch (Reason) { case NoCand: return "NOCAND "; case PhysRegCopy: return "PREG-COPY"; - case SingleExcess: return "REG-EXCESS"; - case SingleCritical: return "REG-CRIT "; + case RegExcess: return "REG-EXCESS"; + case RegCritical: return "REG-CRIT "; case Cluster: return "CLUSTER "; case Weak: return "WEAK "; - case SingleMax: return "REG-MAX "; - case MultiPressure: return "REG-MULTI "; + case RegMax: return "REG-MAX "; case ResourceReduce: return "RES-REDUCE"; case ResourceDemand: return "RES-DEMAND"; case TopDepthReduce: return "TOP-DEPTH "; @@ -2113,20 +2592,20 @@ const char *ConvergingScheduler::getReasonStr( llvm_unreachable("Unknown reason!"); } -void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) { - PressureElement P; +void GenericScheduler::traceCandidate(const SchedCandidate &Cand) { + PressureChange P; unsigned ResIdx = 0; unsigned Latency = 0; switch (Cand.Reason) { default: break; - case SingleExcess: + case RegExcess: P = Cand.RPDelta.Excess; break; - case SingleCritical: + case RegCritical: P = Cand.RPDelta.CriticalMax; break; - case SingleMax: + case RegMax: P = Cand.RPDelta.CurrentMax; break; case ResourceReduce: @@ -2150,8 +2629,8 @@ void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) { } dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); if (P.isValid()) - dbgs() << " " << TRI->getRegPressureSetName(P.PSetID) - << ":" << P.UnitIncrease << " "; + dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) + << ":" << P.getUnitInc() << " "; else dbgs() << " "; if (ResIdx) @@ -2166,12 +2645,12 @@ void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) { } #endif -/// Pick the best candidate from the top queue. +/// Pick the best candidate from the queue. /// /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during /// DAG building. To adjust for the current scheduling location we need to /// maintain the number of vreg uses remaining to be top-scheduled. -void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, +void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, const RegPressureTracker &RPTracker, SchedCandidate &Cand) { ReadyQueue &Q = Zone.Available; @@ -2196,30 +2675,31 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, } } -static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, +static void tracePick(const GenericScheduler::SchedCandidate &Cand, bool IsTop) { DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") - << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); + << GenericScheduler::getReasonStr(Cand.Reason) << '\n'); } /// Pick the best candidate node from either the top or bottom queue. -SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { +SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { // Schedule as far as possible in the direction of no choice. This is most // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { IsTopNode = false; - DEBUG(dbgs() << "Pick Top NOCAND\n"); + DEBUG(dbgs() << "Pick Bot NOCAND\n"); return SU; } if (SUnit *SU = Top.pickOnlyChoice()) { IsTopNode = true; - DEBUG(dbgs() << "Pick Bot NOCAND\n"); + DEBUG(dbgs() << "Pick Top NOCAND\n"); return SU; } CandPolicy NoPolicy; SchedCandidate BotCand(NoPolicy); SchedCandidate TopCand(NoPolicy); - checkResourceLimits(TopCand, BotCand); + Bot.setPolicy(BotCand.Policy, Top); + Top.setPolicy(TopCand.Policy, Bot); // Prefer bottom scheduling when heuristics are silent. pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); @@ -2232,7 +2712,10 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { // affects picking from either Q. If scheduling in one direction must // increase pressure for one of the excess PSets, then schedule in that // direction first to provide more freedom in the other direction. - if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) { + if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess)) + || (BotCand.Reason == RegCritical + && !BotCand.isRepeat(RegCritical))) + { IsTopNode = false; tracePick(BotCand, IsTopNode); return BotCand.SU; @@ -2241,37 +2724,20 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); assert(TopCand.Reason != NoCand && "failed to find the first candidate"); - // If either Q has a single candidate that minimizes pressure above the - // original region's pressure pick it. - if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) { - if (TopCand.Reason < BotCand.Reason) { - IsTopNode = true; - tracePick(TopCand, IsTopNode); - return TopCand.SU; - } - IsTopNode = false; - tracePick(BotCand, IsTopNode); - return BotCand.SU; - } - // Check for a salient pressure difference and pick the best from either side. - if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { - IsTopNode = true; - tracePick(TopCand, IsTopNode); - return TopCand.SU; - } - // Otherwise prefer the bottom candidate, in node order if all else failed. + // Choose the queue with the most important (lowest enum) reason. if (TopCand.Reason < BotCand.Reason) { IsTopNode = true; tracePick(TopCand, IsTopNode); return TopCand.SU; } + // Otherwise prefer the bottom candidate, in node order if all else failed. IsTopNode = false; tracePick(BotCand, IsTopNode); return BotCand.SU; } /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. -SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { +SUnit *GenericScheduler::pickNode(bool &IsTopNode) { if (DAG->top() == DAG->bottom()) { assert(Top.Available.empty() && Top.Pending.empty() && Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); @@ -2279,24 +2745,26 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { } SUnit *SU; do { - if (ForceTopDown) { + if (RegionPolicy.OnlyTopDown) { SU = Top.pickOnlyChoice(); if (!SU) { CandPolicy NoPolicy; SchedCandidate TopCand(NoPolicy); pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); - assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + assert(TopCand.Reason != NoCand && "failed to find a candidate"); + tracePick(TopCand, true); SU = TopCand.SU; } IsTopNode = true; } - else if (ForceBottomUp) { + else if (RegionPolicy.OnlyBottomUp) { SU = Bot.pickOnlyChoice(); if (!SU) { CandPolicy NoPolicy; SchedCandidate BotCand(NoPolicy); pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); - assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + assert(BotCand.Reason != NoCand && "failed to find a candidate"); + tracePick(BotCand, false); SU = BotCand.SU; } IsTopNode = false; @@ -2315,7 +2783,7 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { return SU; } -void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { +void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { MachineBasicBlock::iterator InsertPos = SU->getInstr(); if (!isTop) @@ -2346,15 +2814,15 @@ void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { /// /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling /// them here. See comments in biasPhysRegCopy. -void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { +void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { - SU->TopReadyCycle = Top.CurrCycle; + SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle); Top.bumpNode(SU); if (SU->hasPhysRegUses) reschedulePhysRegCopies(SU, true); } else { - SU->BotReadyCycle = Bot.CurrCycle; + SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle); Bot.bumpNode(SU); if (SU->hasPhysRegDefs) reschedulePhysRegCopies(SU, false); @@ -2363,26 +2831,23 @@ void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// Create the standard converging machine scheduler. This will be used as the /// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { - assert((!ForceTopDown || !ForceBottomUp) && - "-misched-topdown incompatible with -misched-bottomup"); - ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler()); +static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C) { + ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new GenericScheduler(C)); // Register DAG post-processors. // // FIXME: extend the mutation API to allow earlier mutations to instantiate // data and pass it to later mutations. Have a single mutation that gathers // the interesting nodes in one pass. - if (EnableCopyConstrain) - DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI)); - if (EnableLoadCluster) + DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI)); + if (EnableLoadCluster && DAG->TII->enableClusterLoads()) DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); if (EnableMacroFusion) DAG->addMutation(new MacroFusion(DAG->TII)); return DAG; } static MachineSchedRegistry -ConvergingSchedRegistry("converge", "Standard converging scheduler.", - createConvergingSched); +GenericSchedRegistry("converge", "Standard converging scheduler.", + createGenericSched); //===----------------------------------------------------------------------===// // ILP Scheduler. Currently for experimental analysis of heuristics. @@ -2424,15 +2889,6 @@ struct ILPOrder { /// \brief Schedule based on the ILP metric. class ILPScheduler : public MachineSchedStrategy { - /// In case all subtrees are eventually connected to a common root through - /// data dependence (e.g. reduction), place an upper limit on their size. - /// - /// FIXME: A subtree limit is generally good, but in the situation commented - /// above, where multiple similar subtrees feed a common root, we should - /// only split at a point where the resulting subtrees will be balanced. - /// (a motivating test case must be found). - static const unsigned SubtreeLimit = 16; - ScheduleDAGMI *DAG; ILPOrder Cmp; @@ -2616,7 +3072,7 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { } static bool isNodeHidden(const SUnit *Node) { - return (Node->NumPreds > 10 || Node->NumSuccs > 10); + return (Node->Preds.size() > 10 || Node->Succs.size() > 10); } static bool hasNodeAddressLabel(const SUnit *Node, @@ -2639,7 +3095,11 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { std::string Str; raw_string_ostream SS(Str); - SS << "SU(" << SU->NodeNum << ')'; + const SchedDFSResult *DFS = + static_cast<const ScheduleDAGMI*>(G)->getDFSResult(); + SS << "SU:" << SU->NodeNum; + if (DFS) + SS << " I:" << DFS->getNumInstrs(SU); return SS.str(); } static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp index 4dafbe5..105d7c2 100644 --- a/contrib/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -308,12 +308,29 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, // to be sunk then it's probably worth it. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; + if (!MO.isReg() || !MO.isUse()) + continue; unsigned Reg = MO.getReg(); - if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Reg == 0) continue; - if (MRI->hasOneNonDBGUse(Reg)) - return true; + + // We don't move live definitions of physical registers, + // so sinking their uses won't enable any opportunities. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + + // If this instruction is the only user of a virtual register, + // check if breaking the edge will enable sinking + // both this instruction and the defining instruction. + if (MRI->hasOneNonDBGUse(Reg)) { + // If the definition resides in same MBB, + // claim it's likely we can sink these together. + // If definition resides elsewhere, we aren't + // blocking it from being sunk so don't break the edge. + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (DefMI->getParent() == MI->getParent()) + return true; + } } return false; @@ -394,7 +411,7 @@ static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) { /// collectDebgValues - Scan instructions following MI and collect any /// matching DBG_VALUEs. static void collectDebugValues(MachineInstr *MI, - SmallVector<MachineInstr *, 2> & DbgValues) { + SmallVectorImpl<MachineInstr *> &DbgValues) { DbgValues.clear(); if (!MI->getOperand(0).isReg()) return; @@ -537,8 +554,8 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // We give successors with smaller loop depth higher priority. SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end()); std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI)); - for (SmallVector<MachineBasicBlock*, 4>::iterator SI = Succs.begin(), - E = Succs.end(); SI != E; ++SI) { + for (SmallVectorImpl<MachineBasicBlock *>::iterator SI = Succs.begin(), + E = Succs.end(); SI != E; ++SI) { MachineBasicBlock *SuccBlock = *SI; bool LocalUse = false; if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, @@ -615,9 +632,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo); - // If the block has multiple predecessors, this would introduce computation on - // a path that it doesn't already exist. We could split the critical edge, - // but for now we just punt. + // If the block has multiple predecessors, this is a critical edge. + // Decide if we can sink along it or need to break the edge. if (SuccToSinkTo->pred_size() > 1) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. @@ -697,7 +713,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { ++MachineBasicBlock::iterator(MI)); // Move debug values. - for (SmallVector<MachineInstr *, 2>::iterator DBI = DbgValuesToSink.begin(), + for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(), DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) { MachineInstr *DbgMI = *DBI; SuccToSinkTo->splice(InsertPos, ParentBlock, DbgMI, diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 00f702c..6aa3f67 100644 --- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -853,8 +853,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) { // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) DepCycle += MTM.SchedModel - .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp, - /* FindMin = */ false); + .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp); Cycle = std::max(Cycle, DepCycle); } // Remember the instruction depth. @@ -902,8 +901,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, // We may not know the UseMI of this dependency, if it came from the // live-in list. SchedModel can handle a NULL UseMI. DepHeight += SchedModel - .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op, - /* FindMin = */ false); + .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op); } Height = std::max(Height, DepHeight); // This regunit is dead above MI. @@ -941,7 +939,7 @@ static bool pushDepHeight(const DataDep &Dep, // Adjust height by Dep.DefMI latency. if (!Dep.DefMI->isTransient()) UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, - UseMI, Dep.UseOp, false); + UseMI, Dep.UseOp); // Update Heights[DefMI] to be the maximum height seen. MIHeightMap::iterator I; @@ -1171,7 +1169,7 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) DepCycle += TE.MTM.SchedModel - .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp, false); + .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp); return DepCycle; } diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index 037043f..d61470c 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -212,6 +213,10 @@ namespace { const LiveInterval &LI); void report(const char *msg, const MachineBasicBlock *MBB, const LiveInterval &LI); + void report(const char *msg, const MachineFunction *MF, + const LiveRange &LR); + void report(const char *msg, const MachineBasicBlock *MBB, + const LiveRange &LR); void verifyInlineAsm(const MachineInstr *MI); @@ -224,9 +229,12 @@ namespace { void verifyLiveVariables(); void verifyLiveIntervals(); void verifyLiveInterval(const LiveInterval&); - void verifyLiveIntervalValue(const LiveInterval&, VNInfo*); - void verifyLiveIntervalSegment(const LiveInterval&, - LiveInterval::const_iterator); + void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned); + void verifyLiveRangeSegment(const LiveRange&, + const LiveRange::const_iterator I, unsigned); + void verifyLiveRange(const LiveRange&, unsigned); + + void verifyStackFrame(); }; struct MachineVerifierPass : public MachineFunctionPass { @@ -268,8 +276,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { raw_ostream *OutFile = 0; if (OutFileName) { std::string ErrorInfo; - OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, - raw_fd_ostream::F_Append); + OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, sys::fs::F_Append); if (!ErrorInfo.empty()) { errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n'; exit(1); @@ -412,23 +419,25 @@ void MachineVerifier::report(const char *msg, void MachineVerifier::report(const char *msg, const MachineFunction *MF, const LiveInterval &LI) { report(msg, MF); - *OS << "- interval: "; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) - *OS << PrintReg(LI.reg, TRI); - else - *OS << PrintRegUnit(LI.reg, TRI); - *OS << ' ' << LI << '\n'; + *OS << "- interval: " << LI << '\n'; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, const LiveInterval &LI) { report(msg, MBB); - *OS << "- interval: "; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) - *OS << PrintReg(LI.reg, TRI); - else - *OS << PrintRegUnit(LI.reg, TRI); - *OS << ' ' << LI << '\n'; + *OS << "- interval: " << LI << '\n'; +} + +void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, + const LiveRange &LR) { + report(msg, MBB); + *OS << "- liverange: " << LR << "\n"; +} + +void MachineVerifier::report(const char *msg, const MachineFunction *MF, + const LiveRange &LR) { + report(msg, MF); + *OS << "- liverange: " << LR << "\n"; } void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { @@ -475,6 +484,8 @@ void MachineVerifier::visitMachineFunctionBefore() { // Check that the register use lists are sane. MRI->verifyUseLists(); + + verifyStackFrame(); } // Does iterator point to a and b as the first two elements? @@ -669,8 +680,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB live-in list contains non-physical register", MBB); continue; } - regsLive.insert(*I); - for (MCSubRegIterator SubRegs(*I, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(*I, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) regsLive.insert(*SubRegs); } regsLiveInButUnused = regsLive; @@ -679,8 +690,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { assert(MFI && "Function has no frame info"); BitVector PR = MFI->getPristineRegs(MBB); for (int I = PR.find_first(); I>0; I = PR.find_next(I)) { - regsLive.insert(I); - for (MCSubRegIterator SubRegs(I, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) regsLive.insert(*SubRegs); } @@ -764,7 +775,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { if (MI->getNumOperands() < MCID.getNumOperands()) { report("Too few operands", MI); *OS << MCID.getNumOperands() << " operands expected, but " - << MI->getNumExplicitOperands() << " given.\n"; + << MI->getNumOperands() << " given.\n"; } // Check the tied operands. @@ -822,7 +833,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MO->isReg() && !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) { if (MO->isDef() && !MCOI.isOptionalDef()) - report("Explicit operand marked as def", MO, MONum); + report("Explicit operand marked as def", MO, MONum); if (MO->isImplicit()) report("Explicit operand marked as implicit", MO, MONum); } @@ -997,16 +1008,16 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Check the cached regunit intervals. if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) { for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { - if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) { - LiveRangeQuery LRQ(*LI, UseIdx); + if (const LiveRange *LR = LiveInts->getCachedRegUnit(*Units)) { + LiveQueryResult LRQ = LR->Query(UseIdx); if (!LRQ.valueIn()) { - report("No live range at use", MO, MONum); + report("No live segment at use", MO, MONum); *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI) - << ' ' << *LI << '\n'; + << ' ' << *LR << '\n'; } if (MO->isKill() && !LRQ.isKill()) { report("Live range continues after kill flag", MO, MONum); - *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n'; + *OS << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n'; } } } @@ -1016,9 +1027,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (LiveInts->hasInterval(Reg)) { // This is a virtual register interval. const LiveInterval &LI = LiveInts->getInterval(Reg); - LiveRangeQuery LRQ(LI, UseIdx); + LiveQueryResult LRQ = LI.Query(UseIdx); if (!LRQ.valueIn()) { - report("No live range at use", MO, MONum); + report("No live segment at use", MO, MONum); *OS << UseIdx << " is not live in " << LI << '\n'; } // Check for extra kill flags. @@ -1067,7 +1078,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { llvm::next(MRI->def_begin(Reg)) != MRI->def_end()) report("Multiple virtual register defs in SSA form", MO, MONum); - // Check LiveInts for a live range, but only for virtual registers. + // Check LiveInts for a live segment, but only for virtual registers. if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && !LiveInts->isNotInMIMap(MI)) { SlotIndex DefIdx = LiveInts->getInstructionIndex(MI); @@ -1082,9 +1093,17 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { << DefIdx << " in " << LI << '\n'; } } else { - report("No live range at def", MO, MONum); + report("No live segment at def", MO, MONum); *OS << DefIdx << " is not live in " << LI << '\n'; } + // Check that, if the dead def flag is present, LiveInts agree. + if (MO->isDead()) { + LiveQueryResult LRQ = LI.Query(DefIdx); + if (!LRQ.isDeadDef()) { + report("Live range continues after dead def flag", MO, MONum); + *OS << "Live range: " << LI << '\n'; + } + } } else { report("Virtual register has no Live interval", MO, MONum); } @@ -1331,25 +1350,26 @@ void MachineVerifier::verifyLiveIntervals() { // Verify all the cached regunit intervals. for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) - if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i)) - verifyLiveInterval(*LI); + if (const LiveRange *LR = LiveInts->getCachedRegUnit(i)) + verifyLiveRange(*LR, i); } -void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, - VNInfo *VNI) { +void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, + const VNInfo *VNI, + unsigned Reg) { if (VNI->isUnused()) return; - const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def); + const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def); if (!DefVNI) { - report("Valno not live at def and not marked unused", MF, LI); + report("Valno not live at def and not marked unused", MF, LR); *OS << "Valno #" << VNI->id << '\n'; return; } if (DefVNI != VNI) { - report("Live range at def has different valno", MF, LI); + report("Live segment at def has different valno", MF, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " where valno #" << DefVNI->id << " is live\n"; return; @@ -1357,15 +1377,15 @@ void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); if (!MBB) { - report("Invalid definition index", MF, LI); + report("Invalid definition index", MF, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; + << " in " << LR << '\n'; return; } if (VNI->isPHIDef()) { if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { - report("PHIDef value is not defined at MBB start", MBB, LI); + report("PHIDef value is not defined at MBB start", MBB, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << ", not at the beginning of BB#" << MBB->getNumber() << '\n'; } @@ -1375,161 +1395,154 @@ void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, // Non-PHI def. const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); if (!MI) { - report("No instruction at def index", MBB, LI); + report("No instruction at def index", MBB, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; return; } - bool hasDef = false; - bool isEarlyClobber = false; - for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || !MOI->isDef()) - continue; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - if (MOI->getReg() != LI.reg) - continue; - } else { - if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || - !TRI->hasRegUnit(MOI->getReg(), LI.reg)) + if (Reg != 0) { + bool hasDef = false; + bool isEarlyClobber = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (MOI->getReg() != Reg) + continue; + } else { + if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || + !TRI->hasRegUnit(MOI->getReg(), Reg)) + continue; + } + hasDef = true; + if (MOI->isEarlyClobber()) + isEarlyClobber = true; } - hasDef = true; - if (MOI->isEarlyClobber()) - isEarlyClobber = true; - } - if (!hasDef) { - report("Defining instruction does not modify register", MI); - *OS << "Valno #" << VNI->id << " in " << LI << '\n'; - } + if (!hasDef) { + report("Defining instruction does not modify register", MI); + *OS << "Valno #" << VNI->id << " in " << LR << '\n'; + } - // Early clobber defs begin at USE slots, but other defs must begin at - // DEF slots. - if (isEarlyClobber) { - if (!VNI->def.isEarlyClobber()) { - report("Early clobber def must be at an early-clobber slot", MBB, LI); + // Early clobber defs begin at USE slots, but other defs must begin at + // DEF slots. + if (isEarlyClobber) { + if (!VNI->def.isEarlyClobber()) { + report("Early clobber def must be at an early-clobber slot", MBB, LR); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + } + } else if (!VNI->def.isRegister()) { + report("Non-PHI, non-early clobber def must be at a register slot", + MBB, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; } - } else if (!VNI->def.isRegister()) { - report("Non-PHI, non-early clobber def must be at a register slot", - MBB, LI); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; } } -void -MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, - LiveInterval::const_iterator I) { - const VNInfo *VNI = I->valno; - assert(VNI && "Live range has no valno"); - - if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) { - report("Foreign valno in live range", MF, LI); - *OS << *I << " has a bad valno\n"; +void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, + const LiveRange::const_iterator I, + unsigned Reg) { + const LiveRange::Segment &S = *I; + const VNInfo *VNI = S.valno; + assert(VNI && "Live segment has no valno"); + + if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) { + report("Foreign valno in live segment", MF, LR); + *OS << S << " has a bad valno\n"; } if (VNI->isUnused()) { - report("Live range valno is marked unused", MF, LI); - *OS << *I << '\n'; + report("Live segment valno is marked unused", MF, LR); + *OS << S << '\n'; } - const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start); + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start); if (!MBB) { - report("Bad start of live segment, no basic block", MF, LI); - *OS << *I << '\n'; + report("Bad start of live segment, no basic block", MF, LR); + *OS << S << '\n'; return; } SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); - if (I->start != MBBStartIdx && I->start != VNI->def) { - report("Live segment must begin at MBB entry or valno def", MBB, LI); - *OS << *I << '\n'; + if (S.start != MBBStartIdx && S.start != VNI->def) { + report("Live segment must begin at MBB entry or valno def", MBB, LR); + *OS << S << '\n'; } const MachineBasicBlock *EndMBB = - LiveInts->getMBBFromIndex(I->end.getPrevSlot()); + LiveInts->getMBBFromIndex(S.end.getPrevSlot()); if (!EndMBB) { - report("Bad end of live segment, no basic block", MF, LI); - *OS << *I << '\n'; + report("Bad end of live segment, no basic block", MF, LR); + *OS << S << '\n'; return; } // No more checks for live-out segments. - if (I->end == LiveInts->getMBBEndIdx(EndMBB)) + if (S.end == LiveInts->getMBBEndIdx(EndMBB)) return; // RegUnit intervals are allowed dead phis. - if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() && - I->start == VNI->def && I->end == VNI->def.getDeadSlot()) + if (!TargetRegisterInfo::isVirtualRegister(Reg) && VNI->isPHIDef() && + S.start == VNI->def && S.end == VNI->def.getDeadSlot()) return; // The live segment is ending inside EndMBB const MachineInstr *MI = - LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); + LiveInts->getInstructionFromIndex(S.end.getPrevSlot()); if (!MI) { - report("Live segment doesn't end at a valid instruction", EndMBB, LI); - *OS << *I << '\n'; + report("Live segment doesn't end at a valid instruction", EndMBB, LR); + *OS << S << '\n'; return; } // The block slot must refer to a basic block boundary. - if (I->end.isBlock()) { - report("Live segment ends at B slot of an instruction", EndMBB, LI); - *OS << *I << '\n'; + if (S.end.isBlock()) { + report("Live segment ends at B slot of an instruction", EndMBB, LR); + *OS << S << '\n'; } - if (I->end.isDead()) { + if (S.end.isDead()) { // Segment ends on the dead slot. // That means there must be a dead def. - if (!SlotIndex::isSameInstr(I->start, I->end)) { - report("Live segment ending at dead slot spans instructions", EndMBB, LI); - *OS << *I << '\n'; + if (!SlotIndex::isSameInstr(S.start, S.end)) { + report("Live segment ending at dead slot spans instructions", EndMBB, LR); + *OS << S << '\n'; } } // A live segment can only end at an early-clobber slot if it is being // redefined by an early-clobber def. - if (I->end.isEarlyClobber()) { - if (I+1 == LI.end() || (I+1)->start != I->end) { + if (S.end.isEarlyClobber()) { + if (I+1 == LR.end() || (I+1)->start != S.end) { report("Live segment ending at early clobber slot must be " - "redefined by an EC def in the same instruction", EndMBB, LI); - *OS << *I << '\n'; + "redefined by an EC def in the same instruction", EndMBB, LR); + *OS << S << '\n'; } } // The following checks only apply to virtual registers. Physreg liveness // is too weird to check. - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - // A live range can end with either a redefinition, a kill flag on a + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // A live segment can end with either a redefinition, a kill flag on a // use, or a dead flag on a def. bool hasRead = false; - bool hasDeadDef = false; for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || MOI->getReg() != LI.reg) + if (!MOI->isReg() || MOI->getReg() != Reg) continue; if (MOI->readsReg()) hasRead = true; - if (MOI->isDef() && MOI->isDead()) - hasDeadDef = true; } - - if (I->end.isDead()) { - if (!hasDeadDef) { - report("Instruction doesn't have a dead def operand", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } else { + if (!S.end.isDead()) { if (!hasRead) { - report("Instruction ending live range doesn't read the register", MI); - *OS << *I << " in " << LI << '\n'; + report("Instruction ending live segment doesn't read the register", MI); + *OS << S << " in " << LR << '\n'; } } } // Now check all the basic blocks in this live segment. MachineFunction::const_iterator MFI = MBB; - // Is this live range the beginning of a non-PHIDef VN? - if (I->start == VNI->def && !VNI->isPHIDef()) { + // Is this live segment the beginning of a non-PHIDef VN? + if (S.start == VNI->def && !VNI->isPHIDef()) { // Not live-in to any blocks. if (MBB == EndMBB) return; @@ -1537,9 +1550,9 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, ++MFI; } for (;;) { - assert(LiveInts->isLiveInToMBB(LI, MFI)); + assert(LiveInts->isLiveInToMBB(LR, MFI)); // We don't know how to track physregs into a landing pad. - if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && + if (!TargetRegisterInfo::isVirtualRegister(Reg) && MFI->isLandingPad()) { if (&*MFI == EndMBB) break; @@ -1555,11 +1568,11 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), PE = MFI->pred_end(); PI != PE; ++PI) { SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); - const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); + const VNInfo *PVNI = LR.getVNInfoBefore(PEnd); // All predecessors must have a live-out value. if (!PVNI) { - report("Register not marked live out of predecessor", *PI, LI); + report("Register not marked live out of predecessor", *PI, LR); *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " << PEnd << '\n'; @@ -1568,7 +1581,7 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, // Only PHI-defs can take different predecessor values. if (!IsPHI && PVNI != VNI) { - report("Different value live out of predecessor", *PI, LI); + report("Different value live out of predecessor", *PI, LR); *OS << "Valno #" << PVNI->id << " live out of BB#" << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() @@ -1581,13 +1594,17 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, } } -void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); - I!=E; ++I) - verifyLiveIntervalValue(LI, *I); +void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg) { + for (LiveRange::const_vni_iterator I = LR.vni_begin(), E = LR.vni_end(); + I != E; ++I) + verifyLiveRangeValue(LR, *I, Reg); + + for (LiveRange::const_iterator I = LR.begin(), E = LR.end(); I != E; ++I) + verifyLiveRangeSegment(LR, I, Reg); +} - for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) - verifyLiveIntervalSegment(LI, I); +void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { + verifyLiveRange(LI, LI.reg); // Check the LI only has one connected component. if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { @@ -1606,3 +1623,130 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { } } } + +namespace { + // FrameSetup and FrameDestroy can have zero adjustment, so using a single + // integer, we can't tell whether it is a FrameSetup or FrameDestroy if the + // value is zero. + // We use a bool plus an integer to capture the stack state. + struct StackStateOfBB { + StackStateOfBB() : EntryValue(0), ExitValue(0), EntryIsSetup(false), + ExitIsSetup(false) { } + StackStateOfBB(int EntryVal, int ExitVal, bool EntrySetup, bool ExitSetup) : + EntryValue(EntryVal), ExitValue(ExitVal), EntryIsSetup(EntrySetup), + ExitIsSetup(ExitSetup) { } + // Can be negative, which means we are setting up a frame. + int EntryValue; + int ExitValue; + bool EntryIsSetup; + bool ExitIsSetup; + }; +} + +/// Make sure on every path through the CFG, a FrameSetup <n> is always followed +/// by a FrameDestroy <n>, stack adjustments are identical on all +/// CFG edges to a merge point, and frame is destroyed at end of a return block. +void MachineVerifier::verifyStackFrame() { + int FrameSetupOpcode = TII->getCallFrameSetupOpcode(); + int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); + + SmallVector<StackStateOfBB, 8> SPState; + SPState.resize(MF->getNumBlockIDs()); + SmallPtrSet<const MachineBasicBlock*, 8> Reachable; + + // Visit the MBBs in DFS order. + for (df_ext_iterator<const MachineFunction*, + SmallPtrSet<const MachineBasicBlock*, 8> > + DFI = df_ext_begin(MF, Reachable), DFE = df_ext_end(MF, Reachable); + DFI != DFE; ++DFI) { + const MachineBasicBlock *MBB = *DFI; + + StackStateOfBB BBState; + // Check the exit state of the DFS stack predecessor. + if (DFI.getPathLength() >= 2) { + const MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2); + assert(Reachable.count(StackPred) && + "DFS stack predecessor is already visited.\n"); + BBState.EntryValue = SPState[StackPred->getNumber()].ExitValue; + BBState.EntryIsSetup = SPState[StackPred->getNumber()].ExitIsSetup; + BBState.ExitValue = BBState.EntryValue; + BBState.ExitIsSetup = BBState.EntryIsSetup; + } + + // Update stack state by checking contents of MBB. + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (I->getOpcode() == FrameSetupOpcode) { + // The first operand of a FrameOpcode should be i32. + int Size = I->getOperand(0).getImm(); + assert(Size >= 0 && + "Value should be non-negative in FrameSetup and FrameDestroy.\n"); + + if (BBState.ExitIsSetup) + report("FrameSetup is after another FrameSetup", I); + BBState.ExitValue -= Size; + BBState.ExitIsSetup = true; + } + + if (I->getOpcode() == FrameDestroyOpcode) { + // The first operand of a FrameOpcode should be i32. + int Size = I->getOperand(0).getImm(); + assert(Size >= 0 && + "Value should be non-negative in FrameSetup and FrameDestroy.\n"); + + if (!BBState.ExitIsSetup) + report("FrameDestroy is not after a FrameSetup", I); + int AbsSPAdj = BBState.ExitValue < 0 ? -BBState.ExitValue : + BBState.ExitValue; + if (BBState.ExitIsSetup && AbsSPAdj != Size) { + report("FrameDestroy <n> is after FrameSetup <m>", I); + *OS << "FrameDestroy <" << Size << "> is after FrameSetup <" + << AbsSPAdj << ">.\n"; + } + BBState.ExitValue += Size; + BBState.ExitIsSetup = false; + } + } + SPState[MBB->getNumber()] = BBState; + + // Make sure the exit state of any predecessor is consistent with the entry + // state. + for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), + E = MBB->pred_end(); I != E; ++I) { + if (Reachable.count(*I) && + (SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue || + SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) { + report("The exit stack state of a predecessor is inconsistent.", MBB); + *OS << "Predecessor BB#" << (*I)->getNumber() << " has exit state (" + << SPState[(*I)->getNumber()].ExitValue << ", " + << SPState[(*I)->getNumber()].ExitIsSetup + << "), while BB#" << MBB->getNumber() << " has entry state (" + << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n"; + } + } + + // Make sure the entry state of any successor is consistent with the exit + // state. + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + if (Reachable.count(*I) && + (SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue || + SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) { + report("The entry stack state of a successor is inconsistent.", MBB); + *OS << "Successor BB#" << (*I)->getNumber() << " has entry state (" + << SPState[(*I)->getNumber()].EntryValue << ", " + << SPState[(*I)->getNumber()].EntryIsSetup + << "), while BB#" << MBB->getNumber() << " has exit state (" + << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n"; + } + } + + // Make sure a basic block with return ends with zero stack adjustment. + if (!MBB->empty() && MBB->back().isReturn()) { + if (BBState.ExitIsSetup) + report("A return block ends with a FrameSetup.", MBB); + if (BBState.ExitValue) + report("A return block ends with a nonzero stack adjustment.", MBB); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp index 5584708..dcd9072 100644 --- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp @@ -66,7 +66,7 @@ namespace { /// bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); void LowerPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt); + MachineBasicBlock::iterator LastPHIIt); /// analyzePHINodes - Gather information about the PHI nodes in /// here. In particular, we want to map the number of uses of a virtual @@ -185,10 +185,11 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF, // Get an iterator to the first instruction after the last PHI node (this may // also be the end of the basic block). - MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin()); + MachineBasicBlock::iterator LastPHIIt = + prior(MBB.SkipPHIsAndLabels(MBB.begin())); while (MBB.front().isPHI()) - LowerPHINode(MBB, AfterPHIsIt); + LowerPHINode(MBB, LastPHIIt); return true; } @@ -218,8 +219,11 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, /// LowerPHINode - Lower the PHI node at the top of the specified block, /// void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt) { + MachineBasicBlock::iterator LastPHIIt) { ++NumLowered; + + MachineBasicBlock::iterator AfterPHIsIt = llvm::next(LastPHIIt); + // Unlink the PHI node from the basic block, but don't delete the PHI yet. MachineInstr *MPhi = MBB.remove(MBB.begin()); @@ -309,14 +313,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (IncomingReg) { // Add the region from the beginning of MBB to the copy instruction to // IncomingReg's live interval. - LiveInterval &IncomingLI = LIS->getOrCreateInterval(IncomingReg); + LiveInterval &IncomingLI = LIS->createEmptyInterval(IncomingReg); VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex); if (!IncomingVNI) IncomingVNI = IncomingLI.getNextValue(MBBStartIndex, LIS->getVNInfoAllocator()); - IncomingLI.addRange(LiveRange(MBBStartIndex, - DestCopyIndex.getRegSlot(), - IncomingVNI)); + IncomingLI.addSegment(LiveInterval::Segment(MBBStartIndex, + DestCopyIndex.getRegSlot(), + IncomingVNI)); } LiveInterval &DestLI = LIS->getInterval(DestReg); @@ -328,14 +332,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // the copy instruction. VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex); assert(OrigDestVNI && "PHI destination should be live at block entry."); - DestLI.removeRange(MBBStartIndex, MBBStartIndex.getDeadSlot()); + DestLI.removeSegment(MBBStartIndex, MBBStartIndex.getDeadSlot()); DestLI.createDeadDef(DestCopyIndex.getRegSlot(), LIS->getVNInfoAllocator()); DestLI.removeValNo(OrigDestVNI); } else { // Otherwise, remove the region from the beginning of MBB to the copy // instruction from DestReg's live interval. - DestLI.removeRange(MBBStartIndex, DestCopyIndex.getRegSlot()); + DestLI.removeSegment(MBBStartIndex, DestCopyIndex.getRegSlot()); VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); assert(DestVNI && "PHI destination should be live at its definition."); DestVNI->def = DestCopyIndex.getRegSlot(); @@ -456,7 +460,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (LIS) { if (NewSrcInstr) { LIS->InsertMachineInstrInMaps(NewSrcInstr); - LIS->addLiveRangeToEndOfBlock(IncomingReg, NewSrcInstr); + LIS->addSegmentToEndOfBlock(IncomingReg, NewSrcInstr); } if (!SrcUndef && @@ -507,8 +511,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, "Cannot find kill instruction"); SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst); - SrcLI.removeRange(LastUseIndex.getRegSlot(), - LIS->getMBBEndIdx(&opBlock)); + SrcLI.removeSegment(LastUseIndex.getRegSlot(), + LIS->getMBBEndIdx(&opBlock)); } } } diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h index 9ac47fb4..48234ae 100644 --- a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h +++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h @@ -1,4 +1,4 @@ -//=- PHIEliminationUtils.h - Helper functions for PHI elimination *- C++ -*--=// +//=- PHIEliminationUtils.h - Helper functions for PHI elimination -*- C++ -*-=// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp index bfbc062..f4ffd03 100644 --- a/contrib/llvm/lib/CodeGen/Passes.cpp +++ b/contrib/llvm/lib/CodeGen/Passes.cpp @@ -58,8 +58,6 @@ OptimizeRegAlloc("optimize-regalloc", cl::Hidden, static cl::opt<cl::boolOrDefault> EnableMachineSched("enable-misched", cl::Hidden, cl::desc("Enable the machine instruction scheduling pass.")); -static cl::opt<bool> EnableStrongPHIElim("strong-phi-elim", cl::Hidden, - cl::desc("Use strong PHI elimination.")); static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); @@ -236,7 +234,7 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) // Temporarily disable experimental passes. const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>(); - if (!ST.enableMachineScheduler()) + if (!ST.useMachineScheduler()) disablePass(&MachineSchedulerID); } @@ -300,6 +298,8 @@ void TargetPassConfig::addPass(Pass *P) { if (Started && !Stopped) PM->add(P); + else + delete P; if (StopAfter == PassID) Stopped = true; if (StartAfter == PassID) @@ -331,7 +331,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { addPass(P); // Ends the lifetime of P. // Add the passes after the pass P if there is any. - for (SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4>::iterator + for (SmallVectorImpl<std::pair<AnalysisID, IdentifyingPassPtr> >::iterator I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end(); I != E; ++I) { if ((*I).first == PassID) { @@ -396,7 +396,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { // removed from the parent invoke(s). This could happen when a landing // pad is shared by multiple invokes and is also a target of a normal // edge from elsewhere. - addPass(createSjLjEHPreparePass(TM->getTargetLowering())); + addPass(createSjLjEHPreparePass(TM)); // FALLTHROUGH case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: @@ -404,7 +404,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { addPass(createDwarfEHPass(TM)); break; case ExceptionHandling::None: - addPass(createLowerInvokePass(TM->getTargetLowering())); + addPass(createLowerInvokePass(TM)); // The lower invoke pass may create unreachable code. Remove it. addPass(createUnreachableBlockEliminationPass()); @@ -416,13 +416,13 @@ void TargetPassConfig::addPassesToHandleExceptions() { /// before exception handling preparation passes. void TargetPassConfig::addCodeGenPrepare() { if (getOptLevel() != CodeGenOpt::None && !DisableCGP) - addPass(createCodeGenPreparePass(getTargetLowering())); + addPass(createCodeGenPreparePass(TM)); } /// Add common passes that perform LLVM IR to IR transforms in preparation for /// instruction selection. void TargetPassConfig::addISelPrepare() { - addPass(createStackProtectorPass(getTargetLowering())); + addPass(createStackProtectorPass(TM)); addPreISel(); @@ -673,24 +673,15 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // preferably fix the scavenger to not depend on them). addPass(&LiveVariablesID); - // Add passes that move from transformed SSA into conventional SSA. This is a - // "copy coalescing" problem. - // - if (!EnableStrongPHIElim) { - // Edge splitting is smarter with machine loop info. - addPass(&MachineLoopInfoID); - addPass(&PHIEliminationID); - } + // Edge splitting is smarter with machine loop info. + addPass(&MachineLoopInfoID); + addPass(&PHIEliminationID); // Eventually, we want to run LiveIntervals before PHI elimination. if (EarlyLiveIntervals) addPass(&LiveIntervalsID); addPass(&TwoAddressInstructionPassID); - - if (EnableStrongPHIElim) - addPass(&StrongPHIEliminationID); - addPass(&RegisterCoalescerID); // PreRA instruction scheduling. diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp index a7439b5..28f2d2f 100644 --- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -40,20 +40,30 @@ // If the branch instruction can use flag from "sub", then we can replace // "sub" with "subs" and eliminate the "cmp" instruction. // -// - Optimize Bitcast pairs: -// -// v1 = bitcast v0 -// v2 = bitcast v1 -// = v2 -// => -// v1 = bitcast v0 -// = v0 -// // - Optimize Loads: // // Loads that can be folded into a later instruction. A load is foldable // if it loads to virtual registers and the virtual register defined has // a single use. +// +// - Optimize Copies and Bitcast: +// +// Rewrite copies and bitcasts to avoid cross register bank copies +// when possible. +// E.g., Consider the following example, where capital and lower +// letters denote different register file: +// b = copy A <-- cross-bank copy +// C = copy b <-- cross-bank copy +// => +// b = copy A <-- cross-bank copy +// C = copy A <-- same-bank copy +// +// E.g., for bitcast: +// b = bitcast A <-- cross-bank copy +// C = bitcast b <-- cross-bank copy +// => +// b = bitcast A <-- cross-bank copy +// C = copy A <-- same-bank copy //===----------------------------------------------------------------------===// #define DEBUG_TYPE "peephole-opt" @@ -81,11 +91,11 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer")); STATISTIC(NumReuse, "Number of extension results reused"); -STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); STATISTIC(NumLoadFold, "Number of loads folded"); STATISTIC(NumSelects, "Number of selects optimized"); +STATISTIC(NumCopiesBitcasts, "Number of copies/bitcasts optimized"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -112,11 +122,11 @@ namespace { } private: - bool optimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs); bool optimizeSelect(MachineInstr *MI); + bool optimizeCopyOrBitcast(MachineInstr *MI); bool isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); @@ -298,78 +308,6 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, return Changed; } -/// optimizeBitcastInstr - If the instruction is a bitcast instruction A that -/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast -/// a value cross register classes), and the source is defined by another -/// bitcast instruction B. And if the register class of source of B matches -/// the register class of instruction A, then it is legal to replace all uses -/// of the def of A with source of B. e.g. -/// %vreg0<def> = VMOVSR %vreg1 -/// %vreg3<def> = VMOVRS %vreg0 -/// Replace all uses of vreg3 with vreg1. - -bool PeepholeOptimizer::optimizeBitcastInstr(MachineInstr *MI, - MachineBasicBlock *MBB) { - unsigned NumDefs = MI->getDesc().getNumDefs(); - unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs; - if (NumDefs != 1) - return false; - - unsigned Def = 0; - unsigned Src = 0; - for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - if (MO.isDef()) - Def = Reg; - else if (Src) - // Multiple sources? - return false; - else - Src = Reg; - } - - assert(Def && Src && "Malformed bitcast instruction!"); - - MachineInstr *DefMI = MRI->getVRegDef(Src); - if (!DefMI || !DefMI->isBitcast()) - return false; - - unsigned SrcSrc = 0; - NumDefs = DefMI->getDesc().getNumDefs(); - NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs; - if (NumDefs != 1) - return false; - for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { - const MachineOperand &MO = DefMI->getOperand(i); - if (!MO.isReg() || MO.isDef()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - if (!MO.isDef()) { - if (SrcSrc) - // Multiple sources? - return false; - else - SrcSrc = Reg; - } - } - - if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def)) - return false; - - MRI->replaceRegWith(Def, SrcSrc); - MRI->clearKillFlags(SrcSrc); - MI->eraseFromParent(); - ++NumBitcasts; - return true; -} - /// optimizeCmpInstr - If the instruction is a compare and the previous /// instruction it's comparing against all ready sets (or could be modified to /// set) the same flag as the compare, then we can remove the comparison and use @@ -411,6 +349,150 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) { return true; } +/// \brief Check if the registers defined by the pair (RegisterClass, SubReg) +/// share the same register file. +static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, + const TargetRegisterClass *DefRC, + unsigned DefSubReg, + const TargetRegisterClass *SrcRC, + unsigned SrcSubReg) { + // Same register class. + if (DefRC == SrcRC) + return true; + + // Both operands are sub registers. Check if they share a register class. + unsigned SrcIdx, DefIdx; + if (SrcSubReg && DefSubReg) + return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, + SrcIdx, DefIdx) != NULL; + // At most one of the register is a sub register, make it Src to avoid + // duplicating the test. + if (!SrcSubReg) { + std::swap(DefSubReg, SrcSubReg); + std::swap(DefRC, SrcRC); + } + + // One of the register is a sub register, check if we can get a superclass. + if (SrcSubReg) + return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != NULL; + // Plain copy. + return TRI.getCommonSubClass(DefRC, SrcRC) != NULL; +} + +/// \brief Get the index of the definition and source for \p Copy +/// instruction. +/// \pre Copy.isCopy() or Copy.isBitcast(). +/// \return True if the Copy instruction has only one register source +/// and one register definition. Otherwise, \p DefIdx and \p SrcIdx +/// are invalid. +static bool getCopyOrBitcastDefUseIdx(const MachineInstr &Copy, + unsigned &DefIdx, unsigned &SrcIdx) { + assert((Copy.isCopy() || Copy.isBitcast()) && "Wrong operation type."); + if (Copy.isCopy()) { + // Copy instruction are supposed to be: Def = Src. + if (Copy.getDesc().getNumOperands() != 2) + return false; + DefIdx = 0; + SrcIdx = 1; + assert(Copy.getOperand(DefIdx).isDef() && "Use comes before def!"); + return true; + } + // Bitcast case. + // Bitcasts with more than one def are not supported. + if (Copy.getDesc().getNumDefs() != 1) + return false; + // Initialize SrcIdx to an undefined operand. + SrcIdx = Copy.getDesc().getNumOperands(); + for (unsigned OpIdx = 0, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; ++OpIdx) { + const MachineOperand &MO = Copy.getOperand(OpIdx); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef()) + DefIdx = OpIdx; + else if (SrcIdx != EndOpIdx) + // Multiple sources? + return false; + SrcIdx = OpIdx; + } + return true; +} + +/// \brief Optimize a copy or bitcast instruction to avoid cross +/// register bank copy. The optimization looks through a chain of +/// copies and try to find a source that has a compatible register +/// class. +/// Two register classes are considered to be compatible if they share +/// the same register bank. +/// New copies issued by this optimization are register allocator +/// friendly. This optimization does not remove any copy as it may +/// overconstraint the register allocator, but replaces some when +/// possible. +/// \pre \p MI is a Copy (MI->isCopy() is true) +/// \return True, when \p MI has been optimized. In that case, \p MI has +/// been removed from its parent. +bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) { + unsigned DefIdx, SrcIdx; + if (!MI || !getCopyOrBitcastDefUseIdx(*MI, DefIdx, SrcIdx)) + return false; + + const MachineOperand &MODef = MI->getOperand(DefIdx); + assert(MODef.isReg() && "Copies must be between registers."); + unsigned Def = MODef.getReg(); + + if (TargetRegisterInfo::isPhysicalRegister(Def)) + return false; + + const TargetRegisterClass *DefRC = MRI->getRegClass(Def); + unsigned DefSubReg = MODef.getSubReg(); + + unsigned Src; + unsigned SrcSubReg; + bool ShouldRewrite = false; + MachineInstr *Copy = MI; + const TargetRegisterInfo &TRI = *TM->getRegisterInfo(); + + // Follow the chain of copies until we reach the top or find a + // more suitable source. + do { + unsigned CopyDefIdx, CopySrcIdx; + if (!getCopyOrBitcastDefUseIdx(*Copy, CopyDefIdx, CopySrcIdx)) + break; + const MachineOperand &MO = Copy->getOperand(CopySrcIdx); + assert(MO.isReg() && "Copies must be between registers."); + Src = MO.getReg(); + + if (TargetRegisterInfo::isPhysicalRegister(Src)) + break; + + const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); + SrcSubReg = MO.getSubReg(); + + // If this source does not incur a cross register bank copy, use it. + ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC, + SrcSubReg); + // Follow the chain of copies: get the definition of Src. + Copy = MRI->getVRegDef(Src); + } while (!ShouldRewrite && Copy && (Copy->isCopy() || Copy->isBitcast())); + + // If we did not find a more suitable source, there is nothing to optimize. + if (!ShouldRewrite || Src == MI->getOperand(SrcIdx).getReg()) + return false; + + // Rewrite the copy to avoid a cross register bank penalty. + unsigned NewVR = TargetRegisterInfo::isPhysicalRegister(Def) ? Def : + MRI->createVirtualRegister(DefRC); + MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVR) + .addReg(Src, 0, SrcSubReg); + NewCopy->getOperand(0).setSubReg(DefSubReg); + + MRI->replaceRegWith(Def, NewVR); + MRI->clearKillFlags(NewVR); + MI->eraseFromParent(); + ++NumCopiesBitcasts; + return true; +} + /// isLoadFoldable - Check whether MI is a candidate for folding into a later /// instruction. We only fold loads to virtual registers and the virtual /// register defined has a single use. @@ -523,7 +605,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MI->mayStore() || MI->isCall()) FoldAsLoadDefReg = 0; - if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) || + if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) || (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || (MI->isSelect() && optimizeSelect(MI))) { // MI is deleted. diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp index 53fe273..1afc1ec 100644 --- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -127,6 +127,12 @@ namespace { /// The schedule. Null SUnit*'s represent noop instructions. std::vector<SUnit*> Sequence; + /// The index in BB of RegionEnd. + /// + /// This is the instruction number from the top of the current block, not + /// the SlotIndex. It is only used by the AntiDepBreaker. + unsigned EndIndex; + public: SchedulePostRATDList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, @@ -141,11 +147,14 @@ namespace { /// void startBlock(MachineBasicBlock *BB); + // Set the index of RegionEnd within the current BB. + void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; } + /// Initialize the scheduler state for the next scheduling region. virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, - unsigned endcount); + unsigned regioninstrs); /// Notify that the scheduler has finished scheduling the current region. virtual void exitRegion(); @@ -197,7 +206,7 @@ SchedulePostRATDList::SchedulePostRATDList( TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs) : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), - LiveRegs(TRI->getNumRegs()) + LiveRegs(TRI->getNumRegs()), EndIndex(0) { const TargetMachine &TM = MF.getTarget(); const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); @@ -223,8 +232,8 @@ SchedulePostRATDList::~SchedulePostRATDList() { void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, - unsigned endcount) { - ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + unsigned regioninstrs) { + ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); Sequence.clear(); } @@ -312,20 +321,21 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { unsigned Count = MBB->size(), CurrentCount = Count; for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { MachineInstr *MI = llvm::prior(I); + --Count; // Calls are not scheduling boundaries before register allocation, but // post-ra we don't gain anything by scheduling across calls since we // don't need to worry about register pressure. if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) { - Scheduler.enterRegion(MBB, I, Current, CurrentCount); + Scheduler.enterRegion(MBB, I, Current, CurrentCount - Count); + Scheduler.setEndIndex(CurrentCount); Scheduler.schedule(); Scheduler.exitRegion(); Scheduler.EmitSchedule(); Current = MI; - CurrentCount = Count - 1; + CurrentCount = Count; Scheduler.Observe(MI, CurrentCount); } I = MI; - --Count; if (MI->isBundle()) Count -= MI->getBundleSize(); } @@ -333,6 +343,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { assert((MBB->begin() == Current || CurrentCount != 0) && "Instruction count mismatch!"); Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.setEndIndex(CurrentCount); Scheduler.schedule(); Scheduler.exitRegion(); Scheduler.EmitSchedule(); @@ -424,9 +435,9 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; - LiveRegs.set(Reg); - // Repeat, for all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + // Repeat, for reg and all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) LiveRegs.set(*SubRegs); } } @@ -496,20 +507,19 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { // Ignore two-addr defs. if (MI->isRegTiedToUseOperand(i)) continue; - LiveRegs.reset(Reg); - - // Repeat for all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + // Repeat for reg and all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) LiveRegs.reset(*SubRegs); } // Examine all used registers and set/clear kill flag. When a // register is used multiple times we only set the kill flag on - // the first use. + // the first use. Don't set kill flags on undef operands. killedRegs.reset(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) continue; + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; unsigned Reg = MO.getReg(); if ((Reg == 0) || MRI.isReserved(Reg)) continue; @@ -548,9 +558,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { unsigned Reg = MO.getReg(); if ((Reg == 0) || MRI.isReserved(Reg)) continue; - LiveRegs.set(Reg); - - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) LiveRegs.set(*SubRegs); } } diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index e4e18c3..0c5173a 100644 --- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -78,7 +78,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { unsigned Reg = MI->getOperand(0).getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { - // For virtual regiusters, mark all uses as <undef>, and convert users to + // For virtual registers, mark all uses as <undef>, and convert users to // implicit-def when possible. for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg), diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 9487cbd..b0e494f 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -14,9 +14,6 @@ // This pass must be run after register allocation. After this pass is // executed, it is illegal to construct MO_FrameIndex operands. // -// This pass provides an optional shrink wrapping variant of prolog/epilog -// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp. -// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "pei" @@ -36,6 +33,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -47,6 +45,11 @@ using namespace llvm; char PEI::ID = 0; char &llvm::PrologEpilogCodeInserterID = PEI::ID; +static cl::opt<unsigned> +WarnStackSize("warn-stack-size", cl::Hidden, cl::init((unsigned)-1), + cl::desc("Warn for stack size bigger than the given" + " number")); + INITIALIZE_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) @@ -60,6 +63,38 @@ STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); STATISTIC(NumBytesStackSpace, "Number of bytes used for stack in all functions"); +void PEI::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<TargetPassConfig>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool PEI::isReturnBlock(MachineBasicBlock* MBB) { + return (MBB && !MBB->empty() && MBB->back().isReturn()); +} + +/// Compute the set of return blocks +void PEI::calculateSets(MachineFunction &Fn) { + // Sets used to compute spill, restore placement sets. + const std::vector<CalleeSavedInfo> &CSI = + Fn.getFrameInfo()->getCalleeSavedInfo(); + + // If no CSRs used, we are done. + if (CSI.empty()) + return; + + // Save refs to entry and return blocks. + EntryBlock = Fn.begin(); + for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); + MBB != E; ++MBB) + if (isReturnBlock(MBB)) + ReturnBlocks.push_back(MBB); + + return; +} + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// @@ -87,16 +122,11 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { calculateCalleeSavedRegisters(Fn); // Determine placement of CSR spill/restore code: - // - With shrink wrapping, place spills and restores to tightly - // enclose regions in the Machine CFG of the function where - // they are used. - // - Without shink wrapping (default), place all spills in the - // entry block, all restores in return blocks. - placeCSRSpillsAndRestores(Fn); + // place all spills in the entry block, all restores in return blocks. + calculateSets(Fn); // Add the code to save and restore the callee saved registers - if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::Naked)) + if (!F->hasFnAttribute(Attribute::Naked)) insertCSRSpillsAndRestores(Fn); // Allow the target machine to make final modifications to the function @@ -111,8 +141,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. - if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::Naked)) + if (!F->hasFnAttribute(Attribute::Naked)) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references @@ -129,8 +158,15 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Clear any vregs created by virtual scavenging. Fn.getRegInfo().clearVirtRegs(); + // Warn on stack size when we exceeds the given limit. + MachineFrameInfo *MFI = Fn.getFrameInfo(); + if (WarnStackSize.getNumOccurrences() > 0 && + WarnStackSize < MFI->getStackSize()) + errs() << "warning: Stack size limit exceeded (" << MFI->getStackSize() + << ") in " << Fn.getName() << ".\n"; + delete RS; - clearAllSets(); + ReturnBlocks.clear(); return true; } @@ -208,8 +244,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { return; // In Naked functions we aren't going to save any registers. - if (F.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::Naked)) + if (F.getFunction()->hasFnAttribute(Attribute::Naked)) return; std::vector<CalleeSavedInfo> CSI; @@ -273,7 +308,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { } /// insertCSRSpillsAndRestores - Insert spill and restore code for -/// callee saved registers used in the function, handling shrink wrapping. +/// callee saved registers used in the function. /// void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Get callee saved register information. @@ -291,133 +326,33 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); MachineBasicBlock::iterator I; - if (!ShrinkWrapThisFunction) { - // Spill using target interface. - I = EntryBlock->begin(); - if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - // Add the callee-saved register as live-in. - // It's killed at the spill. - EntryBlock->addLiveIn(CSI[i].getReg()); - - // Insert the spill to the stack frame. - unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, - CSI[i].getFrameIdx(), RC, TRI); - } - } - - // Restore using target interface. - for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { - MachineBasicBlock* MBB = ReturnBlocks[ri]; - I = MBB->end(); --I; - - // Skip over all terminator instructions, which are part of the return - // sequence. - MachineBasicBlock::iterator I2 = I; - while (I2 != MBB->begin() && (--I2)->isTerminator()) - I = I2; - - bool AtStart = I == MBB->begin(); - MachineBasicBlock::iterator BeforeI = I; - if (!AtStart) - --BeforeI; - - // Restore all registers immediately before the return and any - // terminators that precede it. - if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(*MBB, I, Reg, - CSI[i].getFrameIdx(), - RC, TRI); - assert(I != MBB->begin() && - "loadRegFromStackSlot didn't insert any code!"); - // Insert in reverse order. loadRegFromStackSlot can insert - // multiple instructions. - if (AtStart) - I = MBB->begin(); - else { - I = BeforeI; - ++I; - } - } - } - } - return; - } - - // Insert spills. - std::vector<CalleeSavedInfo> blockCSI; - for (CSRegBlockMap::iterator BI = CSRSave.begin(), - BE = CSRSave.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet save = BI->second; - - if (save.empty()) - continue; - - blockCSI.clear(); - for (CSRegSet::iterator RI = save.begin(), - RE = save.end(); RI != RE; ++RI) { - blockCSI.push_back(CSI[*RI]); - } - assert(blockCSI.size() > 0 && - "Could not collect callee saved register info"); - - I = MBB->begin(); - - // When shrink wrapping, use stack slot stores/loads. - for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { + // Spill using target interface. + I = EntryBlock->begin(); + if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. // It's killed at the spill. - MBB->addLiveIn(blockCSI[i].getReg()); + EntryBlock->addLiveIn(CSI[i].getReg()); // Insert the spill to the stack frame. - unsigned Reg = blockCSI[i].getReg(); + unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*MBB, I, Reg, - true, - blockCSI[i].getFrameIdx(), + TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, CSI[i].getFrameIdx(), RC, TRI); } } - for (CSRegBlockMap::iterator BI = CSRRestore.begin(), - BE = CSRRestore.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet restore = BI->second; - - if (restore.empty()) - continue; + // Restore using target interface. + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { + MachineBasicBlock *MBB = ReturnBlocks[ri]; + I = MBB->end(); + --I; - blockCSI.clear(); - for (CSRegSet::iterator RI = restore.begin(), - RE = restore.end(); RI != RE; ++RI) { - blockCSI.push_back(CSI[*RI]); - } - assert(blockCSI.size() > 0 && - "Could not find callee saved register info"); - - // If MBB is empty and needs restores, insert at the _beginning_. - if (MBB->empty()) { - I = MBB->begin(); - } else { - I = MBB->end(); - --I; - - // Skip over all terminator instructions, which are part of the - // return sequence. - if (! I->isTerminator()) { - ++I; - } else { - MachineBasicBlock::iterator I2 = I; - while (I2 != MBB->begin() && (--I2)->isTerminator()) - I = I2; - } - } + // Skip over all terminator instructions, which are part of the return + // sequence. + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->isTerminator()) + I = I2; bool AtStart = I == MBB->begin(); MachineBasicBlock::iterator BeforeI = I; @@ -426,21 +361,21 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Restore all registers immediately before the return and any // terminators that precede it. - for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { - unsigned Reg = blockCSI[i].getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(*MBB, I, Reg, - blockCSI[i].getFrameIdx(), - RC, TRI); - assert(I != MBB->begin() && - "loadRegFromStackSlot didn't insert any code!"); - // Insert in reverse order. loadRegFromStackSlot can insert - // multiple instructions. - if (AtStart) - I = MBB->begin(); - else { - I = BeforeI; - ++I; + if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } } } } @@ -545,14 +480,18 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { unsigned MaxAlign = MFI->getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the - // frame pointer if a frame pointer is required. + // incoming stack pointer if a frame pointer is required and is closer + // to the incoming rather than the final stack pointer. const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) && - !RegInfo->needsStackRealignment(Fn)) { + bool EarlyScavengingSlots = (TFI.hasFP(Fn) && + TFI.isFPCloseToIncomingSP() && + RegInfo->useFPForScavengingIndex(Fn) && + !RegInfo->needsStackRealignment(Fn)); + if (RS && EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); - for (SmallVector<int, 2>::iterator I = SFIs.begin(), - IE = SFIs.end(); I != IE; ++I) + for (SmallVectorImpl<int>::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); } @@ -632,12 +571,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure the special register scavenging spill slot is closest to the // stack pointer. - if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) || - !RegInfo->useFPForScavengingIndex(Fn))) { + if (RS && !EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); - for (SmallVector<int, 2>::iterator I = SFIs.begin(), - IE = SFIs.end(); I != IE; ++I) + for (SmallVectorImpl<int>::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); } @@ -712,6 +650,40 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { void PEI::replaceFrameIndices(MachineFunction &Fn) { if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? + // Store SPAdj at exit of a basic block. + SmallVector<int, 8> SPState; + SPState.resize(Fn.getNumBlockIDs()); + SmallPtrSet<MachineBasicBlock*, 8> Reachable; + + // Iterate over the reachable blocks in DFS order. + for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> > + DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable); + DFI != DFE; ++DFI) { + int SPAdj = 0; + // Check the exit state of the DFS stack predecessor. + if (DFI.getPathLength() >= 2) { + MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2); + assert(Reachable.count(StackPred) && + "DFS stack predecessor is already visited.\n"); + SPAdj = SPState[StackPred->getNumber()]; + } + MachineBasicBlock *BB = *DFI; + replaceFrameIndices(BB, Fn, SPAdj); + SPState[BB->getNumber()] = SPAdj; + } + + // Handle the unreachable blocks. + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + if (Reachable.count(BB)) + // Already handled in DFS traversal. + continue; + int SPAdj = 0; + replaceFrameIndices(BB, Fn, SPAdj); + } +} + +void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj) { const TargetMachine &TM = Fn.getTarget(); assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); @@ -722,89 +694,85 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); - for (MachineFunction::iterator BB = Fn.begin(), - E = Fn.end(); BB != E; ++BB) { -#ifndef NDEBUG - int SPAdjCount = 0; // frame setup / destroy count. -#endif - int SPAdj = 0; // SP offset due to call frame setup / destroy. - if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); + if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); - for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { - if (I->getOpcode() == FrameSetupOpcode || - I->getOpcode() == FrameDestroyOpcode) { -#ifndef NDEBUG - // Track whether we see even pairs of them - SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1; -#endif - // Remember how much SP has been adjusted to create the call - // frame. - int Size = I->getOperand(0).getImm(); - - if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || - (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) - Size = -Size; - - SPAdj += Size; - - MachineBasicBlock::iterator PrevI = BB->end(); - if (I != BB->begin()) PrevI = prior(I); - TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); - - // Visit the instructions created by eliminateCallFramePseudoInstr(). - if (PrevI == BB->end()) - I = BB->begin(); // The replaced instr was the first in the block. - else - I = llvm::next(PrevI); - continue; - } + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + // Remember how much SP has been adjusted to create the call + // frame. + int Size = I->getOperand(0).getImm(); - MachineInstr *MI = I; - bool DoIncr = true; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (!MI->getOperand(i).isFI()) - continue; + if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || + (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) + Size = -Size; - // Some instructions (e.g. inline asm instructions) can have - // multiple frame indices and/or cause eliminateFrameIndex - // to insert more than one instruction. We need the register - // scavenger to go through all of these instructions so that - // it can update its register information. We keep the - // iterator at the point before insertion so that we can - // revisit them in full. - bool AtBeginning = (I == BB->begin()); - if (!AtBeginning) --I; - - // If this instruction has a FrameIndex operand, we need to - // use that target machine register info object to eliminate - // it. - TRI.eliminateFrameIndex(MI, SPAdj, i, - FrameIndexVirtualScavenging ? NULL : RS); - - // Reset the iterator if we were at the beginning of the BB. - if (AtBeginning) { - I = BB->begin(); - DoIncr = false; - } + SPAdj += Size; + + MachineBasicBlock::iterator PrevI = BB->end(); + if (I != BB->begin()) PrevI = prior(I); + TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); - MI = 0; - break; + // Visit the instructions created by eliminateCallFramePseudoInstr(). + if (PrevI == BB->end()) + I = BB->begin(); // The replaced instr was the first in the block. + else + I = llvm::next(PrevI); + continue; + } + + MachineInstr *MI = I; + bool DoIncr = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (!MI->getOperand(i).isFI()) + continue; + + // Frame indicies in debug values are encoded in a target independent + // way with simply the frame index and offset rather than any + // target-specific addressing mode. + if (MI->isDebugValue()) { + assert(i == 0 && "Frame indicies can only appear as the first " + "operand of a DBG_VALUE machine instruction"); + unsigned Reg; + MachineOperand &Offset = MI->getOperand(1); + Offset.setImm(Offset.getImm() + + TFI->getFrameIndexReference( + Fn, MI->getOperand(0).getIndex(), Reg)); + MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/); + continue; } - if (DoIncr && I != BB->end()) ++I; + // Some instructions (e.g. inline asm instructions) can have + // multiple frame indices and/or cause eliminateFrameIndex + // to insert more than one instruction. We need the register + // scavenger to go through all of these instructions so that + // it can update its register information. We keep the + // iterator at the point before insertion so that we can + // revisit them in full. + bool AtBeginning = (I == BB->begin()); + if (!AtBeginning) --I; + + // If this instruction has a FrameIndex operand, we need to + // use that target machine register info object to eliminate + // it. + TRI.eliminateFrameIndex(MI, SPAdj, i, + FrameIndexVirtualScavenging ? NULL : RS); + + // Reset the iterator if we were at the beginning of the BB. + if (AtBeginning) { + I = BB->begin(); + DoIncr = false; + } - // Update register states. - if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); + MI = 0; + break; } - // If we have evenly matched pairs of frame setup / destroy instructions, - // make sure the adjustments come out to zero. If we don't have matched - // pairs, we can't be sure the missing bit isn't in another basic block - // due to a custom inserter playing tricks, so just asserting SPAdj==0 - // isn't sufficient. See tMOVCC on Thumb1, for example. - assert((SPAdjCount || SPAdj == 0) && - "Unbalanced call frame setup / destroy pairs?"); + if (DoIncr && I != BB->end()) ++I; + + // Update register states. + if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } } diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h index 87fff9a..77cfa2b 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h @@ -1,4 +1,4 @@ -//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -* --===// +//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -*---===// // // The LLVM Compiler Infrastructure // @@ -14,9 +14,6 @@ // This pass must be run after register allocation. After this pass is // executed, it is illegal to construct MO_FrameIndex operands. // -// This pass also implements a shrink wrapping variant of prolog/epilog -// insertion. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CODEGEN_PEI_H @@ -54,120 +51,28 @@ namespace llvm { // stack frame indexes. unsigned MinCSFrameIndex, MaxCSFrameIndex; - // Analysis info for spill/restore placement. - // "CSR": "callee saved register". - - // CSRegSet contains indices into the Callee Saved Register Info - // vector built by calculateCalleeSavedRegisters() and accessed - // via MF.getFrameInfo()->getCalleeSavedInfo(). - typedef SparseBitVector<> CSRegSet; - - // CSRegBlockMap maps MachineBasicBlocks to sets of callee - // saved register indices. - typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap; - - // Set and maps for computing CSR spill/restore placement: - // used in function (UsedCSRegs) - // used in a basic block (CSRUsed) - // anticipatable in a basic block (Antic{In,Out}) - // available in a basic block (Avail{In,Out}) - // to be spilled at the entry to a basic block (CSRSave) - // to be restored at the end of a basic block (CSRRestore) - CSRegSet UsedCSRegs; - CSRegBlockMap CSRUsed; - CSRegBlockMap AnticIn, AnticOut; - CSRegBlockMap AvailIn, AvailOut; - CSRegBlockMap CSRSave; - CSRegBlockMap CSRRestore; - // Entry and return blocks of the current function. MachineBasicBlock* EntryBlock; SmallVector<MachineBasicBlock*, 4> ReturnBlocks; - // Map of MBBs to top level MachineLoops. - DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops; - - // Flag to control shrink wrapping per-function: - // may choose to skip shrink wrapping for certain - // functions. - bool ShrinkWrapThisFunction; - // Flag to control whether to use the register scavenger to resolve // frame index materialization registers. Set according to // TRI->requiresFrameIndexScavenging() for the curren function. bool FrameIndexVirtualScavenging; -#ifndef NDEBUG - // Machine function handle. - MachineFunction* MF; - - // Flag indicating that the current function - // has at least one "short" path in the machine - // CFG from the entry block to an exit block. - bool HasFastExitPath; -#endif - - bool calculateSets(MachineFunction &Fn); - bool calcAnticInOut(MachineBasicBlock* MBB); - bool calcAvailInOut(MachineBasicBlock* MBB); - void calculateAnticAvail(MachineFunction &Fn); - bool addUsesForMEMERegion(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4>& blks); - bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks); - bool calcSpillPlacements(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4> &blks, - CSRegBlockMap &prevSpills); - bool calcRestorePlacements(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4> &blks, - CSRegBlockMap &prevRestores); - void placeSpillsAndRestores(MachineFunction &Fn); - void placeCSRSpillsAndRestores(MachineFunction &Fn); + void calculateSets(MachineFunction &Fn); void calculateCallsInformation(MachineFunction &Fn); void calculateCalleeSavedRegisters(MachineFunction &Fn); void insertCSRSpillsAndRestores(MachineFunction &Fn); void calculateFrameObjectOffsets(MachineFunction &Fn); void replaceFrameIndices(MachineFunction &Fn); + void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj); void scavengeFrameVirtualRegs(MachineFunction &Fn); void insertPrologEpilogCode(MachineFunction &Fn); - // Initialize DFA sets, called before iterations. - void clearAnticAvailSets(); - // Clear all sets constructed by shrink wrapping. - void clearAllSets(); - - // Initialize all shrink wrapping data. - void initShrinkWrappingInfo(); - - // Convienences for dealing with machine loops. - MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP); - MachineLoop* getTopLevelLoopParent(MachineLoop *LP); - - // Propgate CSRs used in MBB to all MBBs of loop LP. - void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP); - // Convenience for recognizing return blocks. bool isReturnBlock(MachineBasicBlock* MBB); - -#ifndef NDEBUG - // Debugging methods. - - // Mark this function as having fast exit paths. - void findFastExitPath(); - - // Verify placement of spills/restores. - void verifySpillRestorePlacement(); - - std::string getBasicBlockName(const MachineBasicBlock* MBB); - std::string stringifyCSRegSet(const CSRegSet& s); - void dumpSet(const CSRegSet& s); - void dumpUsed(MachineBasicBlock* MBB); - void dumpAllUsed(); - void dumpSets(MachineBasicBlock* MBB); - void dumpSets1(MachineBasicBlock* MBB); - void dumpAllSets(); - void dumpSRSets(); -#endif - }; } // End llvm namespace #endif diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp index c035590..293e306 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp @@ -43,13 +43,16 @@ static cl::opt<bool, true> VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), cl::desc("Verify during register allocation")); -const char *RegAllocBase::TimerGroupName = "Register Allocation"; +const char RegAllocBase::TimerGroupName[] = "Register Allocation"; bool RegAllocBase::VerifyEnabled = false; //===----------------------------------------------------------------------===// // RegAllocBase Implementation //===----------------------------------------------------------------------===// +// Pin the vtable to this file. +void RegAllocBase::anchor() {} + void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat) { @@ -99,14 +102,13 @@ void RegAllocBase::allocatePhysRegs() { // result from splitting. DEBUG(dbgs() << "\nselectOrSplit " << MRI->getRegClass(VirtReg->reg)->getName() - << ':' << PrintReg(VirtReg->reg) << ' ' << *VirtReg << '\n'); - typedef SmallVector<LiveInterval*, 4> VirtRegVec; + << ':' << *VirtReg << '\n'); + typedef SmallVector<unsigned, 4> VirtRegVec; VirtRegVec SplitVRegs; unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); if (AvailablePhysReg == ~0u) { // selectOrSplit failed to find a register! - const char *Msg = "ran out of registers during register allocation"; // Probably caused by an inline asm. MachineInstr *MI; for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg); @@ -114,9 +116,9 @@ void RegAllocBase::allocatePhysRegs() { if (MI->isInlineAsm()) break; if (MI) - MI->emitError(Msg); + MI->emitError("inline assembly requires more registers than available"); else - report_fatal_error(Msg); + report_fatal_error("ran out of registers during register allocation"); // Keep going after reporting the error. VRM->assignVirt2Phys(VirtReg->reg, RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front()); @@ -128,7 +130,7 @@ void RegAllocBase::allocatePhysRegs() { for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); I != E; ++I) { - LiveInterval *SplitVirtReg = *I; + LiveInterval *SplitVirtReg = &LIS->getInterval(*I); assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h index 064e40f..c17a8d9 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBase.h +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h @@ -38,7 +38,7 @@ #define LLVM_CODEGEN_REGALLOCBASE #include "llvm/ADT/OwningPtr.h" -#include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/RegisterClassInfo.h" namespace llvm { @@ -57,6 +57,7 @@ class Spiller; /// live range splitting. They must also override enqueue/dequeue to provide an /// assignment order. class RegAllocBase { + virtual void anchor(); protected: const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; @@ -90,10 +91,10 @@ protected: // or new set of split live virtual registers. It is up to the splitter to // converge quickly toward fully spilled live ranges. virtual unsigned selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<LiveInterval*> &splitLVRs) = 0; + SmallVectorImpl<unsigned> &splitLVRs) = 0; // Use this group name for NamedRegionTimer. - static const char *TimerGroupName; + static const char TimerGroupName[]; public: /// VerifyEnabled - True when -verify-regalloc is given. diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp index 7fcfe9e..6768e45 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -101,7 +102,7 @@ public: } virtual unsigned selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs); + SmallVectorImpl<unsigned> &SplitVRegs); /// Perform register allocation. virtual bool runOnMachineFunction(MachineFunction &mf); @@ -110,7 +111,7 @@ public: // that interfere with the most recently queried lvr. Return true if spilling // was successful, and append any new spilled/split intervals to splitLVRs. bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs); + SmallVectorImpl<unsigned> &SplitVRegs); static char ID; }; @@ -125,7 +126,6 @@ RABasic::RABasic(): MachineFunctionPass(ID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); - initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); @@ -142,9 +142,10 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveDebugVariables>(); AU.addPreserved<LiveDebugVariables>(); - AU.addRequired<CalculateSpillWeights>(); AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); AU.addRequiredID(MachineDominatorsID); AU.addPreservedID(MachineDominatorsID); AU.addRequired<MachineLoopInfo>(); @@ -165,7 +166,7 @@ void RABasic::releaseMemory() { // that interfere with VirtReg. The newly spilled or split live intervals are // returned by appending them to SplitVRegs. bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs) { + SmallVectorImpl<unsigned> &SplitVRegs) { // Record each interference and determine if all are spillable before mutating // either the union or live intervals. SmallVector<LiveInterval*, 8> Intfs; @@ -219,7 +220,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, // minimal, there is no value in caching them outside the scope of // selectOrSplit(). unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs) { + SmallVectorImpl<unsigned> &SplitVRegs) { // Populate a list of physical register spill candidates. SmallVector<unsigned, 8> PhysRegSpillCands; @@ -276,6 +277,11 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>(), getAnalysis<LiveRegMatrix>()); + + calculateSpillWeightsAndHints(*LIS, *MF, + getAnalysis<MachineLoopInfo>(), + getAnalysis<MachineBlockFrequencyInfo>()); + SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); allocatePhysRegs(); diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp index bb9c05c..e92dbd2 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -144,7 +144,7 @@ namespace { // not be erased. bool isBulkSpilling; - enum { + enum LLVM_ENUM_INT_TYPE(unsigned) { spillClean = 1, spillDirty = 100, spillImpossible = ~0u @@ -293,29 +293,26 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, // If this register is used by DBG_VALUE then insert new DBG_VALUE to // identify spilled location as the place to find corresponding variable's // value. - SmallVector<MachineInstr *, 4> &LRIDbgValues = + SmallVectorImpl<MachineInstr *> &LRIDbgValues = LiveDbgValueMap[LRI->VirtReg]; for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) { MachineInstr *DBG = LRIDbgValues[li]; - const MDNode *MDPtr = - DBG->getOperand(DBG->getNumOperands()-1).getMetadata(); - int64_t Offset = 0; - if (DBG->getOperand(1).isImm()) - Offset = DBG->getOperand(1).getImm(); + const MDNode *MDPtr = DBG->getOperand(2).getMetadata(); + bool IsIndirect = DBG->isIndirectDebugValue(); + uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0; DebugLoc DL; if (MI == MBB->end()) { // If MI is at basic block end then use last instruction's location. MachineBasicBlock::iterator EI = MI; DL = (--EI)->getDebugLoc(); - } - else + } else DL = MI->getDebugLoc(); - if (MachineInstr *NewDV = - TII->emitFrameIndexDebugValue(*MF, FI, Offset, MDPtr, DL)) { - MachineBasicBlock *MBB = DBG->getParent(); - MBB->insert(MI, NewDV); - DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); - } + MachineBasicBlock *MBB = DBG->getParent(); + MachineInstr *NewDV = + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(FI).addImm(Offset).addMetadata(MDPtr); + (void)NewDV; + DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); } // Now this register is spilled there is should not be any DBG_VALUE // pointing to this register because they are all pointing to spilled value @@ -572,7 +569,10 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, } // Nothing we can do. Report an error and keep going with a bad allocation. - MI->emitError("ran out of registers during register allocation"); + if (MI->isInlineAsm()) + MI->emitError("inline assembly requires more registers than available"); + else + MI->emitError("ran out of registers during register allocation"); definePhysReg(MI, *AO.begin(), regFree); return assignVirtToPhysReg(VirtReg, *AO.begin()); } @@ -859,25 +859,21 @@ void RAFast::AllocateBasicBlock() { } else { // Modify DBG_VALUE now that the value is in a spill slot. - int64_t Offset = MI->getOperand(1).getImm(); + bool IsIndirect = MI->isIndirectDebugValue(); + uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; const MDNode *MDPtr = MI->getOperand(MI->getNumOperands()-1).getMetadata(); DebugLoc DL = MI->getDebugLoc(); - if (MachineInstr *NewDV = - TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) { - DEBUG(dbgs() << "Modifying debug info due to spill:" << - "\t" << *MI); - MachineBasicBlock *MBB = MI->getParent(); - MBB->insert(MBB->erase(MI), NewDV); - // Scan NewDV operands from the beginning. - MI = NewDV; - ScanDbgValue = true; - break; - } else { - // We can't allocate a physreg for a DebugValue; sorry! - DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); - MO.setReg(0); - } + MachineBasicBlock *MBB = MI->getParent(); + MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL, + TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(SS).addImm(Offset).addMetadata(MDPtr); + DEBUG(dbgs() << "Modifying debug info due to spill:" + << "\t" << *NewDV); + // Scan NewDV operands from the beginning. + MI = NewDV; + ScanDbgValue = true; + break; } } LiveDbgValueMap[Reg].push_back(MI); diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp index 9eed1fc..c08d955 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -71,6 +72,7 @@ class RAGreedy : public MachineFunctionPass, // analyses SlotIndexes *Indexes; + MachineBlockFrequencyInfo *MBFI; MachineDominatorTree *DomTree; MachineLoopInfo *Loops; EdgeBundles *Bundles; @@ -118,7 +120,9 @@ class RAGreedy : public MachineFunctionPass, RS_Done }; +#ifndef NDEBUG static const char *const StageName[]; +#endif // RegInfo - Keep additional information about each live range. struct RegInfo { @@ -145,7 +149,7 @@ class RAGreedy : public MachineFunctionPass, void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { ExtraRegInfo.resize(MRI->getNumVirtRegs()); for (;Begin != End; ++Begin) { - unsigned Reg = (*Begin)->reg; + unsigned Reg = *Begin; if (ExtraRegInfo[Reg].Stage == RS_New) ExtraRegInfo[Reg].Stage = NewStage; } @@ -158,6 +162,8 @@ class RAGreedy : public MachineFunctionPass, EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {} + bool isMax() const { return BrokenHints == ~0u; } + bool operator<(const EvictionCost &O) const { if (BrokenHints != O.BrokenHints) return BrokenHints < O.BrokenHints; @@ -216,7 +222,7 @@ class RAGreedy : public MachineFunctionPass, /// class. SmallVector<GlobalSplitCandidate, 32> GlobalCand; - enum { NoCand = ~0u }; + enum LLVM_ENUM_INT_TYPE(unsigned) { NoCand = ~0u }; /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to /// NoCand which indicates the stack interval. @@ -237,7 +243,7 @@ public: virtual void enqueue(LiveInterval *LI); virtual LiveInterval *dequeue(); virtual unsigned selectOrSplit(LiveInterval&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); /// Perform register allocation. virtual bool runOnMachineFunction(MachineFunction &mf); @@ -249,33 +255,34 @@ private: void LRE_WillShrinkVirtReg(unsigned); void LRE_DidCloneVirtReg(unsigned, unsigned); - float calcSpillCost(); - bool addSplitConstraints(InterferenceCache::Cursor, float&); + BlockFrequency calcSpillCost(); + bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); void growRegion(GlobalSplitCandidate &Cand); - float calcGlobalSplitCost(GlobalSplitCandidate&); + BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&); bool calcCompactRegion(GlobalSplitCandidate&); void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); void calcGapWeights(unsigned, SmallVectorImpl<float>&); + unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); void evictInterference(LiveInterval&, unsigned, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned tryAssign(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned tryEvict(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&, unsigned = ~0u); + SmallVectorImpl<unsigned>&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned trySplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); }; } // end anonymous namespace @@ -308,7 +315,6 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); - initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); @@ -320,6 +326,8 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) { void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); AU.addRequired<AliasAnalysis>(); AU.addPreserved<AliasAnalysis>(); AU.addRequired<LiveIntervals>(); @@ -330,7 +338,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<LiveDebugVariables>(); AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); - AU.addRequired<CalculateSpillWeights>(); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); @@ -407,15 +414,28 @@ void RAGreedy::enqueue(LiveInterval *LI) { // everything else has been allocated. Prio = Size; } else { - // Everything is allocated in long->short order. Long ranges that don't fit - // should be spilled (or split) ASAP so they don't create interference. - Prio = (1u << 31) + Size; + if (ExtraRegInfo[Reg].Stage == RS_Assign && !LI->empty() && + LIS->intervalIsInOneMBB(*LI)) { + // Allocate original local ranges in linear instruction order. Since they + // are singly defined, this produces optimal coloring in the absence of + // global interference and other constraints. + Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); + } + else { + // Allocate global and split ranges in long->short order. Long ranges that + // don't fit should be spilled (or split) ASAP so they don't create + // interference. Mark a bit to prioritize global above local ranges. + Prio = (1u << 29) + Size; + } + // Mark a higher bit to prioritize global and local above RS_Split. + Prio |= (1u << 31); // Boost ranges that have a physical register hint. if (VRM->hasKnownPreference(Reg)) Prio |= (1u << 30); } - + // The virtual register number is a tie breaker for same-sized ranges. + // Give lower vreg numbers higher priority to assign them first. Queue.push(std::make_pair(Prio, ~Reg)); } @@ -435,7 +455,7 @@ LiveInterval *RAGreedy::dequeue() { /// tryAssign - Try to assign VirtReg to an available register. unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { Order.rewind(); unsigned PhysReg; while ((PhysReg = Order.next())) @@ -476,6 +496,31 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, // Interference eviction //===----------------------------------------------------------------------===// +unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + unsigned PhysReg; + while ((PhysReg = Order.next())) { + if (PhysReg == PrevReg) + continue; + + MCRegUnitIterator Units(PhysReg, TRI); + for (; Units.isValid(); ++Units) { + // Instantiate a "subquery", not to be confused with the Queries array. + LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]); + if (subQ.checkInterference()) + break; + } + // If no units have interference, break out with the current PhysReg. + if (!Units.isValid()) + break; + } + if (PhysReg) + DEBUG(dbgs() << "can reassign: " << VirtReg << " from " + << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI) + << '\n'); + return PhysReg; +} + /// shouldEvict - determine if A should evict the assigned live range B. The /// eviction policy defined by this function together with the allocation order /// defined by enqueue() decides which registers ultimately end up being split @@ -516,6 +561,8 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) return false; + bool IsLocal = LIS->intervalIsInOneMBB(VirtReg); + // Find VirtReg's cascade number. This will be unassigned if VirtReg was never // involved in an eviction before. If a cascade number was assigned, deny // evicting anything with the same or a newer cascade number. This prevents @@ -569,8 +616,17 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, // Abort if this would be too expensive. if (!(Cost < MaxCost)) return false; + if (Urgent) + continue; + // If !MaxCost.isMax(), then we're just looking for a cheap register. + // Evicting another local live range in this case could lead to suboptimal + // coloring. + if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && + !canReassign(*Intf, PhysReg)) { + return false; + } // Finally, apply the eviction policy for non-urgent evictions. - if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) + if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) return false; } } @@ -582,7 +638,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, /// from being assigned to Physreg. This assumes that canEvictInterference /// returned true. void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { // Make sure that VirtReg has a cascade number, and assign that cascade // number to every evicted register. These live ranges than then only be // evicted by a newer cascade, preventing infinite loops. @@ -614,7 +670,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, "Cannot decrease cascade number, illegal eviction"); ExtraRegInfo[Intf->reg].Cascade = Cascade; ++NumEvicted; - NewVRegs.push_back(Intf); + NewVRegs.push_back(Intf->reg); } } @@ -624,7 +680,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, /// @return Physreg to assign VirtReg, or 0. unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs, + SmallVectorImpl<unsigned> &NewVRegs, unsigned CostPerUseLimit) { NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); @@ -699,12 +755,12 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, /// that all preferences in SplitConstraints are met. /// Return false if there are no bundles with positive bias. bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, - float &Cost) { + BlockFrequency &Cost) { ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); // Reset interference dependent info. SplitConstraints.resize(UseBlocks.size()); - float StaticCost = 0; + BlockFrequency StaticCost = 0; for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; @@ -713,7 +769,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, Intf.moveToBlock(BC.Number); BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; - BC.ChangesValue = BI.FirstDef; + BC.ChangesValue = BI.FirstDef.isValid(); if (!Intf.hasInterference()) continue; @@ -742,8 +798,8 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, } // Accumulate the total frequency of inserted spill code. - if (Ins) - StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); + while (Ins--) + StaticCost += SpillPlacer->getBlockFrequency(BC.Number); } Cost = StaticCost; @@ -876,7 +932,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { SpillPlacer->prepare(Cand.LiveBundles); // The static split cost will be zero since Cand.Intf reports no interference. - float Cost; + BlockFrequency Cost; if (!addSplitConstraints(Cand.Intf, Cost)) { DEBUG(dbgs() << ", none.\n"); return false; @@ -901,8 +957,8 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { /// calcSpillCost - Compute how expensive it would be to split the live range in /// SA around all use blocks instead of forming bundle regions. -float RAGreedy::calcSpillCost() { - float Cost = 0; +BlockFrequency RAGreedy::calcSpillCost() { + BlockFrequency Cost = 0; ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; @@ -921,8 +977,8 @@ float RAGreedy::calcSpillCost() { /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. /// -float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { - float GlobalCost = 0; +BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { + BlockFrequency GlobalCost = 0; const BitVector &LiveBundles = Cand.LiveBundles; ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { @@ -936,8 +992,8 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); if (BI.LiveOut) Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); - if (Ins) - GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); + while (Ins--) + GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); } for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { @@ -949,8 +1005,10 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { if (RegIn && RegOut) { // We need double spill code if this block has interference. Cand.Intf.moveToBlock(Number); - if (Cand.Intf.hasInterference()) - GlobalCost += 2*SpillPlacer->getBlockFrequency(Number); + if (Cand.Intf.hasInterference()) { + GlobalCost += SpillPlacer->getBlockFrequency(Number); + GlobalCost += SpillPlacer->getBlockFrequency(Number); + } continue; } // live-in / stack-out or stack-in live-out. @@ -1067,7 +1125,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, SmallVector<unsigned, 8> IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(Reg, LREdit.regs()); + DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); ExtraRegInfo.resize(MRI->getNumVirtRegs()); unsigned OrigBlocks = SA->getNumLiveBlocks(); @@ -1078,7 +1136,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // - Block-local splits are candidates for local splitting. // - DCE leftovers should go back on the queue. for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { - LiveInterval &Reg = *LREdit.get(i); + LiveInterval &Reg = LIS->getInterval(LREdit.get(i)); // Ignore old intervals from DCE. if (getStage(Reg) != RS_New) @@ -1112,10 +1170,10 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, } unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { unsigned NumCands = 0; unsigned BestCand = NoCand; - float BestCost; + BlockFrequency BestCost; SmallVector<unsigned, 8> UsedCands; // Check if we can split this live range around a compact region. @@ -1123,11 +1181,11 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, if (HasCompact) { // Yes, keep GlobalCand[0] as the compact region candidate. NumCands = 1; - BestCost = HUGE_VALF; + BestCost = BlockFrequency::getMaxFrequency(); } else { // No benefit from the compact region, our fallback will be per-block // splitting. Make sure we find a solution that is cheaper than spilling. - BestCost = Hysteresis * calcSpillCost(); + BestCost = calcSpillCost(); DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); } @@ -1157,7 +1215,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, Cand.reset(IntfCache, PhysReg); SpillPlacer->prepare(Cand.LiveBundles); - float Cost; + BlockFrequency Cost; if (!addSplitConstraints(Cand.Intf, Cost)) { DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); continue; @@ -1193,7 +1251,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, }); if (Cost < BestCost) { BestCand = NumCands; - BestCost = Hysteresis * Cost; // Prevent rounding effects. + BestCost = Cost; } ++NumCands; } @@ -1247,7 +1305,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// creates a lot of local live ranges, that will be split by tryLocalSplit if /// they don't allocate. unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); unsigned Reg = VirtReg.reg; bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); @@ -1268,14 +1326,14 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, SE->finish(&IntvMap); // Tell LiveDebugVariables about the new ranges. - DebugVars->splitRegister(Reg, LREdit.regs()); + DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); ExtraRegInfo.resize(MRI->getNumVirtRegs()); // Sort out the new intervals created by splitting. The remainder interval // goes straight to spilling, the new local ranges get to stay RS_New. for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { - LiveInterval &LI = *LREdit.get(i); + LiveInterval &LI = LIS->getInterval(LREdit.get(i)); if (getStage(LI) == RS_New && IntvMap[i] == 0) setStage(LI, RS_Spill); } @@ -1299,7 +1357,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// This is similar to spilling to a larger register class. unsigned RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { // There is no point to this if there are no larger sub-classes. if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg))) return 0; @@ -1335,7 +1393,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVector<unsigned, 8> IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); ExtraRegInfo.resize(MRI->getNumVirtRegs()); // Assign all new registers to RS_Spill. This was the last chance. @@ -1406,9 +1464,9 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, // Add fixed interference. for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { - const LiveInterval &LI = LIS->getRegUnit(*Units); - LiveInterval::const_iterator I = LI.find(StartIdx); - LiveInterval::const_iterator E = LI.end(); + const LiveRange &LR = LIS->getRegUnit(*Units); + LiveRange::const_iterator I = LR.find(StartIdx); + LiveRange::const_iterator E = LR.end(); // Same loop as above. Mark any overlapped gaps as HUGE_VALF. for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { @@ -1419,7 +1477,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, break; for (; Gap != NumGaps; ++Gap) { - GapWeight[Gap] = HUGE_VALF; + GapWeight[Gap] = llvm::huge_valf; if (Uses[Gap+1].getBaseIndex() >= I->end) break; } @@ -1433,7 +1491,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, /// basic block. /// unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); @@ -1511,7 +1569,9 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned BestAfter = 0; float BestDiff = 0; - const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber()); + const float blockFreq = + SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * + (1.0f / BlockFrequency::getEntryFrequency()); SmallVector<float, 8> GapWeight; Order.rewind(); @@ -1523,7 +1583,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Remove any gaps with regmask clobbers. if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) - GapWeight[RegMaskGaps[i]] = HUGE_VALF; + GapWeight[RegMaskGaps[i]] = llvm::huge_valf; // Try to find the best sequence of gaps to close. // The new spill weight must be larger than any gap interference. @@ -1558,7 +1618,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Legally, without causing looping? bool Legal = !ProgressRequired || NewGaps < NumGaps; - if (Legal && MaxGap < HUGE_VALF) { + if (Legal && MaxGap < llvm::huge_valf) { // Estimate the new spill weight. Each instruction reads or writes the // register. Conservatively assume there are no read-modify-write // instructions. @@ -1625,7 +1685,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, SE->useIntv(SegStart, SegStop); SmallVector<unsigned, 8> IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); // If the new range has the same number of instructions as before, mark it as // RS_Split2 so the next split will be forced to make progress. Otherwise, @@ -1638,8 +1698,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, assert(!ProgressRequired && "Didn't make progress when it was required."); for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) if (IntvMap[i] == 1) { - setStage(*LREdit.get(i), RS_Split2); - DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg)); + setStage(LIS->getInterval(LREdit.get(i)), RS_Split2); + DEBUG(dbgs() << PrintReg(LREdit.get(i))); } DEBUG(dbgs() << '\n'); } @@ -1656,7 +1716,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// assignable. /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*>&NewVRegs) { + SmallVectorImpl<unsigned>&NewVRegs) { // Ranges must be Split2 or less. if (getStage(VirtReg) >= RS_Spill) return 0; @@ -1705,7 +1765,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, //===----------------------------------------------------------------------===// unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) @@ -1730,7 +1790,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, if (Stage < RS_Split) { setStage(VirtReg, RS_Split); DEBUG(dbgs() << "wait for second round\n"); - NewVRegs.push_back(&VirtReg); + NewVRegs.push_back(VirtReg.reg); return 0; } @@ -1770,6 +1830,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { getAnalysis<LiveIntervals>(), getAnalysis<LiveRegMatrix>()); Indexes = &getAnalysis<SlotIndexes>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); DomTree = &getAnalysis<MachineDominatorTree>(); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); Loops = &getAnalysis<MachineLoopInfo>(); @@ -1777,8 +1838,12 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { SpillPlacer = &getAnalysis<SpillPlacement>(); DebugVars = &getAnalysis<LiveDebugVariables>(); + calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI); + + DEBUG(LIS->dump()); + SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); - SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree)); + SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI)); ExtraRegInfo.clear(); ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp index 15a88e2..88c8201 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -40,6 +40,7 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -94,9 +95,7 @@ public: : MachineFunctionPass(ID), builder(b.take()), customPassID(cPassID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); - initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); } @@ -130,8 +129,8 @@ private: const TargetMachine *tm; const TargetRegisterInfo *tri; const TargetInstrInfo *tii; - const MachineLoopInfo *loopInfo; MachineRegisterInfo *mri; + const MachineBlockFrequencyInfo *mbfi; OwningPtr<Spiller> spiller; LiveIntervals *lis; @@ -158,13 +157,13 @@ char RegAllocPBQP::ID = 0; } // End anonymous namespace. -unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const { +unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::NodeId node) const { Node2VReg::const_iterator vregItr = node2VReg.find(node); assert(vregItr != node2VReg.end() && "No vreg for node."); return vregItr->second; } -PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const { +PBQP::Graph::NodeId PBQPRAProblem::getNodeForVReg(unsigned vreg) const { VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg); assert(nodeItr != vreg2Node.end() && "No node for vreg."); return nodeItr->second; @@ -188,7 +187,7 @@ unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const { } PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, - const MachineLoopInfo *loopInfo, + const MachineBlockFrequencyInfo *mbfi, const RegSet &vregs) { LiveIntervals *LIS = const_cast<LiveIntervals*>(lis); @@ -247,7 +246,7 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, } // Construct the node. - PBQP::Graph::NodeItr node = + PBQP::Graph::NodeId node = g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0)); // Record the mapping and allowed set in the problem. @@ -273,7 +272,7 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, assert(!l2.empty() && "Empty interval in vreg set?"); if (l1.overlaps(l2)) { - PBQP::Graph::EdgeItr edge = + PBQP::Graph::EdgeId edge = g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2), PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0)); @@ -313,10 +312,10 @@ void PBQPBuilder::addInterferenceCosts( PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, const LiveIntervals *lis, - const MachineLoopInfo *loopInfo, + const MachineBlockFrequencyInfo *mbfi, const RegSet &vregs) { - OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, loopInfo, vregs)); + OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, mbfi, vregs)); PBQP::Graph &g = p->getGraph(); const TargetMachine &tm = mf->getTarget(); @@ -350,7 +349,7 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, PBQP::PBQPNum cBenefit = copyFactor * LiveIntervals::getSpillWeight(false, true, - loopInfo->getLoopDepth(mbb)); + mbfi->getBlockFreq(mbb)); if (cp.isPhys()) { if (!mf->getRegInfo().isAllocatable(dst)) { @@ -364,16 +363,16 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, } if (pregOpt < allowed.size()) { ++pregOpt; // +1 to account for spill option. - PBQP::Graph::NodeItr node = p->getNodeForVReg(src); + PBQP::Graph::NodeId node = p->getNodeForVReg(src); addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit); } } else { const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst); const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src); - PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst); - PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src); - PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2); - if (edge == g.edgesEnd()) { + PBQP::Graph::NodeId node1 = p->getNodeForVReg(dst); + PBQP::Graph::NodeId node2 = p->getNodeForVReg(src); + PBQP::Graph::EdgeId edge = g.findEdge(node1, node2); + if (edge == g.invalidEdgeId()) { edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1, allowed2->size() + 1, 0)); @@ -432,13 +431,14 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { //au.addRequiredID(SplitCriticalEdgesID); if (customPassID) au.addRequiredID(*customPassID); - au.addRequired<CalculateSpillWeights>(); au.addRequired<LiveStacks>(); au.addPreserved<LiveStacks>(); - au.addRequired<MachineDominatorTree>(); - au.addPreserved<MachineDominatorTree>(); + au.addRequired<MachineBlockFrequencyInfo>(); + au.addPreserved<MachineBlockFrequencyInfo>(); au.addRequired<MachineLoopInfo>(); au.addPreserved<MachineLoopInfo>(); + au.addRequired<MachineDominatorTree>(); + au.addPreserved<MachineDominatorTree>(); au.addRequired<VirtRegMap>(); au.addPreserved<VirtRegMap>(); MachineFunctionPass::getAnalysisUsage(au); @@ -475,11 +475,11 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, const PBQP::Graph &g = problem.getGraph(); // Iterate over the nodes mapping the PBQP solution to a register // assignment. - for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(), - nodeEnd = g.nodesEnd(); - node != nodeEnd; ++node) { - unsigned vreg = problem.getVRegForNode(node); - unsigned alloc = solution.getSelection(node); + for (PBQP::Graph::NodeItr nodeItr = g.nodesBegin(), + nodeEnd = g.nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + unsigned vreg = problem.getVRegForNode(*nodeItr); + unsigned alloc = solution.getSelection(*nodeItr); if (problem.isPRegOption(vreg, alloc)) { unsigned preg = problem.getPRegForOption(vreg, alloc); @@ -489,7 +489,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, vrm->assignVirt2Phys(vreg, preg); } else if (problem.isSpillOption(vreg, alloc)) { vregsToAlloc.erase(vreg); - SmallVector<LiveInterval*, 8> newSpills; + SmallVector<unsigned, 8> newSpills; LiveRangeEdit LRE(&lis->getInterval(vreg), newSpills, *mf, *lis, vrm); spiller->spill(LRE); @@ -500,9 +500,10 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, // allocate. for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end(); itr != end; ++itr) { - assert(!(*itr)->empty() && "Empty spill range."); - DEBUG(dbgs() << PrintReg((*itr)->reg, tri) << " "); - vregsToAlloc.insert((*itr)->reg); + LiveInterval &li = lis->getInterval(*itr); + assert(!li.empty() && "Empty spill range."); + DEBUG(dbgs() << PrintReg(li.reg, tri) << " "); + vregsToAlloc.insert(li.reg); } DEBUG(dbgs() << ")\n"); @@ -546,7 +547,10 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { lis = &getAnalysis<LiveIntervals>(); lss = &getAnalysis<LiveStacks>(); - loopInfo = &getAnalysis<MachineLoopInfo>(); + mbfi = &getAnalysis<MachineBlockFrequencyInfo>(); + + calculateSpillWeightsAndHints(*lis, MF, getAnalysis<MachineLoopInfo>(), + *mbfi); vrm = &getAnalysis<VirtRegMap>(); spiller.reset(createInlineSpiller(*this, MF, *vrm)); @@ -584,7 +588,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n"); OwningPtr<PBQPRAProblem> problem( - builder->build(mf, lis, loopInfo, vregsToAlloc)); + builder->build(mf, lis, mbfi, vregsToAlloc)); #ifndef NDEBUG if (pbqpDumpGraphs) { diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp index 87382d8..cacd7de 100644 --- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -40,6 +40,9 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { if (MF->getTarget().getRegisterInfo() != TRI) { TRI = MF->getTarget().getRegisterInfo(); RegClass.reset(new RCInfo[TRI->getNumRegClasses()]); + unsigned NumPSets = TRI->getNumRegPressureSets(); + PSetLimits.reset(new unsigned[NumPSets]); + std::fill(&PSetLimits[0], &PSetLimits[NumPSets], 0); Update = true; } @@ -144,3 +147,32 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { RCI.Tag = Tag; } +/// This is not accurate because two overlapping register sets may have some +/// nonoverlapping reserved registers. However, computing the allocation order +/// for all register classes would be too expensive. +unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const { + const TargetRegisterClass *RC = 0; + unsigned NumRCUnits = 0; + for (TargetRegisterInfo::regclass_iterator + RI = TRI->regclass_begin(), RE = TRI->regclass_end(); RI != RE; ++RI) { + const int *PSetID = TRI->getRegClassPressureSets(*RI); + for (; *PSetID != -1; ++PSetID) { + if ((unsigned)*PSetID == Idx) + break; + } + if (*PSetID == -1) + continue; + + // Found a register class that counts against this pressure set. + // For efficiency, only compute the set order for the largest set. + unsigned NUnits = TRI->getRegClassWeight(*RI).WeightLimit; + if (!RC || NUnits > NumRCUnits) { + RC = *RI; + NumRCUnits = NUnits; + } + } + compute(RC); + unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC); + return TRI->getRegPressureSetLimit(Idx) + - TRI->getRegClassWeight(RC).RegWeight * NReserved; +} diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp index d85646d..dd86c1f 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -166,7 +166,8 @@ namespace { /// reMaterializeTrivialDef - If the source of a copy is defined by a /// trivial computation, replace the copy by rematerialize the definition. - bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI); + bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI, + bool &IsDefCopy); /// canJoinPhys - Return true if a physreg copy should be joined. bool canJoinPhys(const CoalescerPair &CP); @@ -397,7 +398,7 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { } void RegisterCoalescer::eliminateDeadDefs() { - SmallVector<LiveInterval*, 8> NewRegs; + SmallVector<unsigned, 8> NewRegs; LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs); } @@ -433,11 +434,11 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); - // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // BValNo is a value number in B that is defined by a copy from A. 'B1' in // the example above. - LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); - if (BLR == IntB.end()) return false; - VNInfo *BValNo = BLR->valno; + LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx); + if (BS == IntB.end()) return false; + VNInfo *BValNo = BS->valno; // Get the location that B is defined at. Two options: either this value has // an unknown definition point or it is defined at CopyIdx. If unknown, we @@ -446,10 +447,10 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // AValNo is the value number in A that defines the copy, A3 in the example. SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); - LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); - // The live range might not exist after fun with physreg coalescing. - if (ALR == IntA.end()) return false; - VNInfo *AValNo = ALR->valno; + LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx); + // The live segment might not exist after fun with physreg coalescing. + if (AS == IntA.end()) return false; + VNInfo *AValNo = AS->valno; // If AValNo is defined as a copy from IntB, we can potentially process this. // Get the instruction that defines this value number. @@ -458,54 +459,54 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy()) return false; - // Get the LiveRange in IntB that this value number starts with. - LiveInterval::iterator ValLR = - IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); - if (ValLR == IntB.end()) + // Get the Segment in IntB that this value number starts with. + LiveInterval::iterator ValS = + IntB.FindSegmentContaining(AValNo->def.getPrevSlot()); + if (ValS == IntB.end()) return false; - // Make sure that the end of the live range is inside the same block as + // Make sure that the end of the live segment is inside the same block as // CopyMI. - MachineInstr *ValLREndInst = - LIS->getInstructionFromIndex(ValLR->end.getPrevSlot()); - if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) + MachineInstr *ValSEndInst = + LIS->getInstructionFromIndex(ValS->end.getPrevSlot()); + if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent()) return false; - // Okay, we now know that ValLR ends in the same block that the CopyMI - // live-range starts. If there are no intervening live ranges between them in - // IntB, we can merge them. - if (ValLR+1 != BLR) return false; + // Okay, we now know that ValS ends in the same block that the CopyMI + // live-range starts. If there are no intervening live segments between them + // in IntB, we can merge them. + if (ValS+1 != BS) return false; DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI)); - SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; + SlotIndex FillerStart = ValS->end, FillerEnd = BS->start; // We are about to delete CopyMI, so need to remove it as the 'instruction // that defines this value #'. Update the valnum with the new defining // instruction #. BValNo->def = FillerStart; // Okay, we can merge them. We need to insert a new liverange: - // [ValLR.end, BLR.begin) of either value number, then we merge the + // [ValS.end, BS.begin) of either value number, then we merge the // two value numbers. - IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); + IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo)); // Okay, merge "B1" into the same value number as "B0". - if (BValNo != ValLR->valno) - IntB.MergeValueNumberInto(BValNo, ValLR->valno); + if (BValNo != ValS->valno) + IntB.MergeValueNumberInto(BValNo, ValS->valno); DEBUG(dbgs() << " result = " << IntB << '\n'); // If the source instruction was killing the source register before the // merge, unset the isKill marker given the live range has been extended. - int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); + int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true); if (UIdx != -1) { - ValLREndInst->getOperand(UIdx).setIsKill(false); + ValSEndInst->getOperand(UIdx).setIsKill(false); } // Rewrite the copy. If the copy instruction was killing the destination // register before the merge, find the last use and trim the live range. That // will also add the isKill marker. CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); - if (ALR->end == CopyIdx) + if (AS->end == CopyIdx) LIS->shrinkToUses(&IntA); ++numExtends; @@ -526,11 +527,11 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; - LiveInterval::Ranges::iterator BI = - std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); - if (BI != IntB.ranges.begin()) + LiveInterval::iterator BI = + std::upper_bound(IntB.begin(), IntB.end(), AI->start); + if (BI != IntB.begin()) --BI; - for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { + for (; BI != IntB.end() && AI->end >= BI->start; ++BI) { if (BI->valno == BValNo) continue; if (BI->start <= AI->start && BI->end > AI->start) @@ -576,14 +577,12 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, LiveInterval &IntB = LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); - // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // BValNo is a value number in B that is defined by a copy from A. 'B1' in // the example above. VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); if (!BValNo || BValNo->def != CopyIdx) return false; - assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); - // AValNo is the value number in A that defines the copy, A3 in the example. VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); assert(AValNo && "COPY source not live"); @@ -613,7 +612,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); unsigned NewReg = NewDstMO.getReg(); - if (NewReg != IntB.reg || !LiveRangeQuery(IntB, AValNo->def).isKill()) + if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill()) return false; // Make sure there are no other definitions of IntB that would reach the @@ -628,8 +627,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; SlotIndex UseIdx = LIS->getInstructionIndex(UseMI); - LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); - if (ULR == IntA.end() || ULR->valno != AValNo) + LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); + if (US == IntA.end() || US->valno != AValNo) continue; // If this use is tied to a def, we can't rewrite the register. if (UseMI->isRegTiedToDefOperand(UI.getOperandNo())) @@ -680,8 +679,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, continue; } SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); - LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); - if (ULR == IntA.end() || ULR->valno != AValNo) + LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); + if (US == IntA.end() || US->valno != AValNo) continue; // Kill flags are no longer accurate. They are recomputed after RA. UseMO.setIsKill(false); @@ -711,14 +710,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, UseMI->eraseFromParent(); } - // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition + // Extend BValNo by merging in IntA live segments of AValNo. Val# definition // is updated. VNInfo *ValNo = BValNo; ValNo->def = AValNo->def; for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; - IntB.addRange(LiveRange(AI->start, AI->end, ValNo)); + IntB.addSegment(LiveInterval::Segment(AI->start, AI->end, ValNo)); } DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); @@ -731,23 +730,29 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, /// reMaterializeTrivialDef - If the source of a copy is defined by a trivial /// computation, replace the copy by rematerialize the definition. bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, - MachineInstr *CopyMI) { + MachineInstr *CopyMI, + bool &IsDefCopy) { + IsDefCopy = false; unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg(); + unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx(); unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); + unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx(); if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) return false; LiveInterval &SrcInt = LIS->getInterval(SrcReg); - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); - LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); - assert(SrcLR != SrcInt.end() && "Live range not found!"); - VNInfo *ValNo = SrcLR->valno; + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI); + VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn(); + assert(ValNo && "CopyMI input register not live"); if (ValNo->isPHIDef() || ValNo->isUnused()) return false; MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def); if (!DefMI) return false; - assert(DefMI && "Defining instruction disappeared"); + if (DefMI->isCopyLike()) { + IsDefCopy = true; + return false; + } if (!DefMI->isAsCheapAsAMove()) return false; if (!TII->isTriviallyReMaterializable(DefMI, AA)) @@ -760,31 +765,41 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, return false; // Only support subregister destinations when the def is read-undef. MachineOperand &DstOperand = CopyMI->getOperand(0); + unsigned CopyDstReg = DstOperand.getReg(); if (DstOperand.getSubReg() && !DstOperand.isUndef()) return false; + + const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); if (!DefMI->isImplicitDef()) { - // Make sure the copy destination register class fits the instruction - // definition register class. The mismatch can happen as a result of earlier - // extract_subreg, insert_subreg, subreg_to_reg coalescing. - const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF); - if (TargetRegisterInfo::isVirtualRegister(DstReg)) { - if (!MRI->constrainRegClass(DstReg, RC)) + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + unsigned NewDstReg = DstReg; + + unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), + DefMI->getOperand(0).getSubReg()); + if (NewDstIdx) + NewDstReg = TRI->getSubReg(DstReg, NewDstIdx); + + // Finally, make sure that the physical subregister that will be + // constructed later is permitted for the instruction. + if (!DefRC->contains(NewDstReg)) return false; - } else if (!RC->contains(DstReg)) - return false; + } else { + // Theoretically, some stack frame reference could exist. Just make sure + // it hasn't actually happened. + assert(TargetRegisterInfo::isVirtualRegister(DstReg) && + "Only expect to deal with virtual or physical registers"); + } } MachineBasicBlock *MBB = CopyMI->getParent(); MachineBasicBlock::iterator MII = llvm::next(MachineBasicBlock::iterator(CopyMI)); - TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI); + TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, DefMI, *TRI); MachineInstr *NewMI = prior(MII); - // The original DefMI may have been a subregister def, but the full register - // class of its destination matches the destination of CopyMI, and CopyMI is - // either a full register def or is read-undef. Therefore we can clear the - // subregister index on the rematerialized instruction. - NewMI->getOperand(0).setSubReg(0); + LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); + CopyMI->eraseFromParent(); + ErasedInstrs.insert(CopyMI); // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86). // We need to remember these so we can add intervals once we insert @@ -800,6 +815,47 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, } } + if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + unsigned NewIdx = NewMI->getOperand(0).getSubReg(); + const TargetRegisterClass *RCForInst; + if (NewIdx) + RCForInst = TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg), DefRC, + NewIdx); + + if (MRI->constrainRegClass(DstReg, DefRC)) { + // The materialized instruction is quite capable of setting DstReg + // directly, but it may still have a now-trivial subregister index which + // we should clear. + NewMI->getOperand(0).setSubReg(0); + } else if (NewIdx && RCForInst) { + // The subreg index on NewMI is essential; we still have to make sure + // DstReg:idx is in a class that NewMI can use. + MRI->constrainRegClass(DstReg, RCForInst); + } else { + // DstReg is actually incompatible with NewMI, we have to move to a + // super-reg's class. This could come from a sequence like: + // GR32 = MOV32r0 + // GR8 = COPY GR32:sub_8 + MRI->setRegClass(DstReg, CP.getNewRC()); + updateRegDefsUses(DstReg, DstReg, DstIdx); + NewMI->getOperand(0).setSubReg( + TRI->composeSubRegIndices(SrcIdx, DefMI->getOperand(0).getSubReg())); + } + } else if (NewMI->getOperand(0).getReg() != CopyDstReg) { + // The New instruction may be defining a sub-register of what's actually + // been asked for. If so it must implicitly define the whole thing. + assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && + "Only expect virtual or physical registers in remat"); + NewMI->getOperand(0).setIsDead(true); + NewMI->addOperand(MachineOperand::CreateReg(CopyDstReg, + true /*IsDef*/, + true /*IsImp*/, + false /*IsKill*/)); + } + + if (NewMI->getOperand(0).getSubReg()) + NewMI->getOperand(0).setIsUndef(); + // CopyMI may have implicit operands, transfer them over to the newly // rematerialized instruction. And update implicit def interval valnos. for (unsigned i = CopyMI->getDesc().getNumOperands(), @@ -814,18 +870,14 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, } } - LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); - SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { unsigned Reg = NewMIImplDefs[i]; for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) - if (LiveInterval *LI = LIS->getCachedRegUnit(*Units)) - LI->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); + if (LiveRange *LR = LIS->getCachedRegUnit(*Units)) + LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); } - CopyMI->eraseFromParent(); - ErasedInstrs.insert(CopyMI); DEBUG(dbgs() << "Remat: " << *NewMI); ++NumReMats; @@ -994,7 +1046,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (CP.getSrcReg() == CP.getDstReg()) { LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); - LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(CopyMI)); + LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(CopyMI)); if (VNInfo *DefVNI = LRQ.valueDefined()) { VNInfo *ReadVNI = LRQ.valueIn(); assert(ReadVNI && "No value before copy and no <undef> flag."); @@ -1015,8 +1067,11 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (!canJoinPhys(CP)) { // Before giving up coalescing, if definition of source is defined by // trivial computation, try rematerializing it. - if (reMaterializeTrivialDef(CP, CopyMI)) + bool IsDefCopy; + if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) return true; + if (IsDefCopy) + Again = true; // May be possible to coalesce later. return false; } } else { @@ -1034,8 +1089,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { }); // When possible, let DstReg be the larger interval. - if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).ranges.size() > - LIS->getInterval(CP.getDstReg()).ranges.size()) + if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() > + LIS->getInterval(CP.getDstReg()).size()) CP.flip(); } @@ -1048,10 +1103,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // If definition of source is defined by trivial computation, try // rematerializing it. - if (reMaterializeTrivialDef(CP, CopyMI)) + bool IsDefCopy; + if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) return true; - // If we can eliminate the copy without merging the live ranges, do so now. + // If we can eliminate the copy without merging the live segments, do so + // now. if (!CP.isPartial() && !CP.isPhys()) { if (adjustCopiesBackFrom(CP, CopyMI) || removeCopyByCommutingDef(CP, CopyMI)) { @@ -1099,10 +1156,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); DEBUG({ - dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI); - if (!CP.isPhys()) + dbgs() << "\tJoined. Result = "; + if (CP.isPhys()) + dbgs() << PrintReg(CP.getDstReg(), TRI); + else dbgs() << LIS->getInterval(CP.getDstReg()); - dbgs() << '\n'; + dbgs() << '\n'; }); ++numJoins; @@ -1114,8 +1173,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { assert(CP.isPhys() && "Must be a physreg copy"); assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register"); LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); - DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS - << '\n'); + DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); assert(CP.isFlipped() && RHS.containsOneValue() && "Invalid join with reserved register"); @@ -1384,7 +1442,7 @@ VNInfo *JoinVals::stripCopies(VNInfo *VNI) { unsigned Reg = MI->getOperand(1).getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) break; - LiveRangeQuery LRQ(LIS->getInterval(Reg), VNI->def); + LiveQueryResult LRQ = LIS->getInterval(Reg).Query(VNI->def); if (!LRQ.valueIn()) break; VNI = LRQ.valueIn(); @@ -1435,7 +1493,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // The <read-undef> flag on the def operand means that old lane values are // not important. if (Redef) { - V.RedefVNI = LiveRangeQuery(LI, VNI->def).valueIn(); + V.RedefVNI = LI.Query(VNI->def).valueIn(); assert(V.RedefVNI && "Instruction is reading nonexistent value"); computeAssignment(V.RedefVNI->id, Other); V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; @@ -1452,7 +1510,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { } // Find the value in Other that overlaps VNI->def, if any. - LiveRangeQuery OtherLRQ(Other.LI, VNI->def); + LiveQueryResult OtherLRQ = Other.LI.Query(VNI->def); // It is possible that both values are defined by the same instruction, or // the values are PHIs defined in the same block. When that happens, the two @@ -1911,8 +1969,8 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI); JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI); - DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS - << "\n\t\tLHS = " << PrintReg(CP.getDstReg()) << ' ' << LHS + DEBUG(dbgs() << "\t\tRHS = " << RHS + << "\n\t\tLHS = " << LHS << '\n'); // First compute NewVNInfo and the simple value mappings. @@ -1943,8 +2001,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); // Join RHS into LHS. - LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo, - MRI); + LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo); // Kill flags are going to be wrong if the live ranges were overlapping. // Eventually, we should simply clear all kill flags when computing live @@ -1959,7 +2016,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // CR_Replace conflicts. DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: " << LHS << '\n'); - LIS->extendToIndices(&LHS, EndPoints); + LIS->extendToIndices(LHS, EndPoints); return true; } @@ -1985,9 +2042,8 @@ struct MBBPriorityInfo { // block (the unsigned), and then on the MBB number. // // EnableGlobalCopies assumes that the primary sort key is loop depth. -static int compareMBBPriority(const void *L, const void *R) { - const MBBPriorityInfo *LHS = static_cast<const MBBPriorityInfo*>(L); - const MBBPriorityInfo *RHS = static_cast<const MBBPriorityInfo*>(R); +static int compareMBBPriority(const MBBPriorityInfo *LHS, + const MBBPriorityInfo *RHS) { // Deeper loops first if (LHS->Depth != RHS->Depth) return LHS->Depth > RHS->Depth ? -1 : 1; @@ -2012,6 +2068,9 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { if (!Copy->isCopy()) return false; + if (Copy->getOperand(1).isUndef()) + return false; + unsigned SrcReg = Copy->getOperand(1).getReg(); unsigned DstReg = Copy->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(SrcReg) @@ -2057,8 +2116,8 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { // are not inherently easier to resolve, but slightly preferable until we // have local live range splitting. In particular this is required by // cmp+jmp macro fusion. - for (MachineBasicBlock::reverse_iterator - MII = MBB->rbegin(), E = MBB->rend(); MII != E; ++MII) { + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E; ++MII) { if (!MII->isCopyLike()) continue; if (isLocalCopy(&(*MII), LIS)) @@ -2142,7 +2201,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>(); if (EnableGlobalCopies == cl::BOU_UNSET) - JoinGlobalCopies = ST.enableMachineScheduler(); + JoinGlobalCopies = ST.useMachineScheduler(); else JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp index 97f22e1..092ecdd 100644 --- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp @@ -25,68 +25,39 @@ using namespace llvm; /// Increase pressure for each pressure set provided by TargetRegisterInfo. static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure, - std::vector<unsigned> &MaxSetPressure, - const int *PSet, unsigned Weight) { - for (; *PSet != -1; ++PSet) { - CurrSetPressure[*PSet] += Weight; - if (&CurrSetPressure != &MaxSetPressure - && CurrSetPressure[*PSet] > MaxSetPressure[*PSet]) { - MaxSetPressure[*PSet] = CurrSetPressure[*PSet]; - } - } + PSetIterator PSetI) { + unsigned Weight = PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) + CurrSetPressure[*PSetI] += Weight; } /// Decrease pressure for each pressure set provided by TargetRegisterInfo. static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure, - const int *PSet, unsigned Weight) { - for (; *PSet != -1; ++PSet) { - assert(CurrSetPressure[*PSet] >= Weight && "register pressure underflow"); - CurrSetPressure[*PSet] -= Weight; - } -} - -/// Directly increase pressure only within this RegisterPressure result. -void RegisterPressure::increase(unsigned Reg, const TargetRegisterInfo *TRI, - const MachineRegisterInfo *MRI) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - increaseSetPressure(MaxSetPressure, MaxSetPressure, - TRI->getRegClassPressureSets(RC), - TRI->getRegClassWeight(RC).RegWeight); - } - else { - increaseSetPressure(MaxSetPressure, MaxSetPressure, - TRI->getRegUnitPressureSets(Reg), - TRI->getRegUnitWeight(Reg)); - } -} - -/// Directly decrease pressure only within this RegisterPressure result. -void RegisterPressure::decrease(unsigned Reg, const TargetRegisterInfo *TRI, - const MachineRegisterInfo *MRI) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - decreaseSetPressure(MaxSetPressure, TRI->getRegClassPressureSets(RC), - TRI->getRegClassWeight(RC).RegWeight); - } - else { - decreaseSetPressure(MaxSetPressure, TRI->getRegUnitPressureSets(Reg), - TRI->getRegUnitWeight(Reg)); + PSetIterator PSetI) { + unsigned Weight = PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) { + assert(CurrSetPressure[*PSetI] >= Weight && "register pressure underflow"); + CurrSetPressure[*PSetI] -= Weight; } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -static void dumpSetPressure(const std::vector<unsigned> &SetPressure, - const TargetRegisterInfo *TRI) { +void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure, + const TargetRegisterInfo *TRI) { + bool Empty = true; for (unsigned i = 0, e = SetPressure.size(); i < e; ++i) { - if (SetPressure[i] != 0) + if (SetPressure[i] != 0) { dbgs() << TRI->getRegPressureSetName(i) << "=" << SetPressure[i] << '\n'; + Empty = false; + } } + if (Empty) + dbgs() << "\n"; } void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << "Max Pressure: "; - dumpSetPressure(MaxSetPressure, TRI); + dumpRegSetPressure(MaxSetPressure, TRI); dbgs() << "Live In: "; for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i) dbgs() << PrintReg(LiveInRegs[i], TRI) << " "; @@ -98,44 +69,33 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { } void RegPressureTracker::dump() const { - dbgs() << "Curr Pressure: "; - dumpSetPressure(CurrSetPressure, TRI); + if (!isTopClosed() || !isBottomClosed()) { + dbgs() << "Curr Pressure: "; + dumpRegSetPressure(CurrSetPressure, TRI); + } P.dump(TRI); } #endif /// Increase the current pressure as impacted by these registers and bump /// the high water mark if needed. -void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> Regs) { - for (unsigned I = 0, E = Regs.size(); I != E; ++I) { - if (TargetRegisterInfo::isVirtualRegister(Regs[I])) { - const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]); - increaseSetPressure(CurrSetPressure, P.MaxSetPressure, - TRI->getRegClassPressureSets(RC), - TRI->getRegClassWeight(RC).RegWeight); - } - else { - increaseSetPressure(CurrSetPressure, P.MaxSetPressure, - TRI->getRegUnitPressureSets(Regs[I]), - TRI->getRegUnitWeight(Regs[I])); +void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) { + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + PSetIterator PSetI = MRI->getPressureSets(RegUnits[i]); + unsigned Weight = PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) { + CurrSetPressure[*PSetI] += Weight; + if (CurrSetPressure[*PSetI] > P.MaxSetPressure[*PSetI]) { + P.MaxSetPressure[*PSetI] = CurrSetPressure[*PSetI]; + } } } } /// Simply decrease the current pressure as impacted by these registers. -void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> Regs) { - for (unsigned I = 0, E = Regs.size(); I != E; ++I) { - if (TargetRegisterInfo::isVirtualRegister(Regs[I])) { - const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]); - decreaseSetPressure(CurrSetPressure, - TRI->getRegClassPressureSets(RC), - TRI->getRegClassWeight(RC).RegWeight); - } - else { - decreaseSetPressure(CurrSetPressure, TRI->getRegUnitPressureSets(Regs[I]), - TRI->getRegUnitWeight(Regs[I])); - } - } +void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> RegUnits) { + for (unsigned I = 0, E = RegUnits.size(); I != E; ++I) + decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnits[I])); } /// Clear the result so it can be used for another round of pressure tracking. @@ -187,12 +147,30 @@ void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) { LiveInRegs.clear(); } -const LiveInterval *RegPressureTracker::getInterval(unsigned Reg) const { +const LiveRange *RegPressureTracker::getLiveRange(unsigned Reg) const { if (TargetRegisterInfo::isVirtualRegister(Reg)) return &LIS->getInterval(Reg); return LIS->getCachedRegUnit(Reg); } +void RegPressureTracker::reset() { + MBB = 0; + LIS = 0; + + CurrSetPressure.clear(); + LiveThruPressure.clear(); + P.MaxSetPressure.clear(); + + if (RequireIntervals) + static_cast<IntervalPressure&>(P).reset(); + else + static_cast<RegionPressure&>(P).reset(); + + LiveRegs.PhysRegs.clear(); + LiveRegs.VirtRegs.clear(); + UntiedDefs.clear(); +} + /// Setup the RegPressureTracker. /// /// TODO: Add support for pressure without LiveIntervals. @@ -200,13 +178,17 @@ void RegPressureTracker::init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, - MachineBasicBlock::const_iterator pos) + MachineBasicBlock::const_iterator pos, + bool ShouldTrackUntiedDefs) { + reset(); + MF = mf; TRI = MF->getTarget().getRegisterInfo(); RCI = rci; MRI = &MF->getRegInfo(); MBB = mbb; + TrackUntiedDefs = ShouldTrackUntiedDefs; if (RequireIntervals) { assert(lis && "IntervalPressure requires LiveIntervals"); @@ -216,16 +198,12 @@ void RegPressureTracker::init(const MachineFunction *mf, CurrPos = pos; CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0); - if (RequireIntervals) - static_cast<IntervalPressure&>(P).reset(); - else - static_cast<RegionPressure&>(P).reset(); P.MaxSetPressure = CurrSetPressure; - LiveRegs.PhysRegs.clear(); LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs()); - LiveRegs.VirtRegs.clear(); LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs()); + if (TrackUntiedDefs) + UntiedDefs.setUniverse(MRI->getNumVirtRegs()); } /// Does this pressure result have a valid top position and live ins. @@ -304,16 +282,36 @@ void RegPressureTracker::closeRegion() { // If both top and bottom are closed, do nothing. } +/// The register tracker is unaware of global liveness so ignores normal +/// live-thru ranges. However, two-address or coalesced chains can also lead +/// to live ranges with no holes. Count these to inform heuristics that we +/// can never drop below this pressure. +void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) { + LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0); + assert(isBottomClosed() && "need bottom-up tracking to intialize."); + for (unsigned i = 0, e = P.LiveOutRegs.size(); i < e; ++i) { + unsigned Reg = P.LiveOutRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(Reg) + && !RPTracker.hasUntiedDef(Reg)) { + increaseSetPressure(LiveThruPressure, MRI->getPressureSets(Reg)); + } + } +} + /// \brief Convenient wrapper for checking membership in RegisterOperands. -static bool containsReg(ArrayRef<unsigned> Regs, unsigned Reg) { - return std::find(Regs.begin(), Regs.end(), Reg) != Regs.end(); +/// (std::count() doesn't have an early exit). +static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) { + return std::find(RegUnits.begin(), RegUnits.end(), RegUnit) != RegUnits.end(); } /// Collect this instruction's unique uses and defs into SmallVectors for /// processing defs and uses in order. +/// +/// FIXME: always ignore tied opers class RegisterOperands { const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; + bool IgnoreDead; public: SmallVector<unsigned, 8> Uses; @@ -321,7 +319,8 @@ public: SmallVector<unsigned, 8> DeadDefs; RegisterOperands(const TargetRegisterInfo *tri, - const MachineRegisterInfo *mri): TRI(tri), MRI(mri) {} + const MachineRegisterInfo *mri, bool ID = false): + TRI(tri), MRI(mri), IgnoreDead(ID) {} /// Push this operand's register onto the correct vector. void collect(const MachineOperand &MO) { @@ -330,25 +329,27 @@ public: if (MO.readsReg()) pushRegUnits(MO.getReg(), Uses); if (MO.isDef()) { - if (MO.isDead()) - pushRegUnits(MO.getReg(), DeadDefs); + if (MO.isDead()) { + if (!IgnoreDead) + pushRegUnits(MO.getReg(), DeadDefs); + } else pushRegUnits(MO.getReg(), Defs); } } protected: - void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &Regs) { + void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &RegUnits) { if (TargetRegisterInfo::isVirtualRegister(Reg)) { - if (containsReg(Regs, Reg)) + if (containsReg(RegUnits, Reg)) return; - Regs.push_back(Reg); + RegUnits.push_back(Reg); } else if (MRI->isAllocatable(Reg)) { for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { - if (containsReg(Regs, *Units)) + if (containsReg(RegUnits, *Units)) continue; - Regs.push_back(*Units); + RegUnits.push_back(*Units); } } } @@ -367,6 +368,56 @@ static void collectOperands(const MachineInstr *MI, RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end()); } +/// Initialize an array of N PressureDiffs. +void PressureDiffs::init(unsigned N) { + Size = N; + if (N <= Max) { + memset(PDiffArray, 0, N * sizeof(PressureDiff)); + return; + } + Max = Size; + free(PDiffArray); + PDiffArray = reinterpret_cast<PressureDiff*>(calloc(N, sizeof(PressureDiff))); +} + +/// Add a change in pressure to the pressure diff of a given instruction. +void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec, + const MachineRegisterInfo *MRI) { + PSetIterator PSetI = MRI->getPressureSets(RegUnit); + int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) { + // Find an existing entry in the pressure diff for this PSet. + PressureDiff::iterator I = begin(), E = end(); + for (; I != E && I->isValid(); ++I) { + if (I->getPSet() >= *PSetI) + break; + } + // If all pressure sets are more constrained, skip the remaining PSets. + if (I == E) + break; + // Insert this PressureChange. + if (!I->isValid() || I->getPSet() != *PSetI) { + PressureChange PTmp = PressureChange(*PSetI); + for (PressureDiff::iterator J = I; J != E && PTmp.isValid(); ++J) + std::swap(*J,PTmp); + } + // Update the units for this pressure set. + I->setUnitInc(I->getUnitInc() + Weight); + } +} + +/// Record the pressure difference induced by the given operand list. +static void collectPDiff(PressureDiff &PDiff, RegisterOperands &RegOpers, + const MachineRegisterInfo *MRI) { + assert(!PDiff.begin()->isValid() && "stale PDiff"); + + for (unsigned i = 0, e = RegOpers.Defs.size(); i != e; ++i) + PDiff.addPressureChange(RegOpers.Defs[i], true, MRI); + + for (unsigned i = 0, e = RegOpers.Uses.size(); i != e; ++i) + PDiff.addPressureChange(RegOpers.Uses[i], false, MRI); +} + /// Force liveness of registers. void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) { for (unsigned i = 0, e = Regs.size(); i != e; ++i) { @@ -383,7 +434,7 @@ void RegPressureTracker::discoverLiveIn(unsigned Reg) { // At live in discovery, unconditionally increase the high water mark. P.LiveInRegs.push_back(Reg); - P.increase(Reg, TRI, MRI); + increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg)); } /// Add Reg to the live out set and increase max pressure. @@ -394,11 +445,16 @@ void RegPressureTracker::discoverLiveOut(unsigned Reg) { // At live out discovery, unconditionally increase the high water mark. P.LiveOutRegs.push_back(Reg); - P.increase(Reg, TRI, MRI); + increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg)); } -/// Recede across the previous instruction. -bool RegPressureTracker::recede() { +/// Recede across the previous instruction. If LiveUses is provided, record any +/// RegUnits that are made live by the current instruction's uses. This includes +/// registers that are both defined and used by the instruction. If a pressure +/// difference pointer is provided record the changes is pressure caused by this +/// instruction independent of liveness. +bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses, + PressureDiff *PDiff) { // Check for the top of the analyzable region. if (CurrPos == MBB->begin()) { closeRegion(); @@ -431,6 +487,9 @@ bool RegPressureTracker::recede() { RegisterOperands RegOpers(TRI, MRI); collectOperands(CurrPos, RegOpers); + if (PDiff) + collectPDiff(*PDiff, RegOpers, MRI); + // Boost pressure for all dead defs together. increaseRegPressure(RegOpers.DeadDefs); decreaseRegPressure(RegOpers.DeadDefs); @@ -439,10 +498,20 @@ bool RegPressureTracker::recede() { // TODO: consider earlyclobbers? for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { unsigned Reg = RegOpers.Defs[i]; - if (LiveRegs.erase(Reg)) - decreaseRegPressure(Reg); - else - discoverLiveOut(Reg); + bool DeadDef = false; + if (RequireIntervals) { + const LiveRange *LR = getLiveRange(Reg); + if (LR) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + DeadDef = LRQ.isDeadDef(); + } + } + if (!DeadDef) { + if (LiveRegs.erase(Reg)) + decreaseRegPressure(Reg); + else + discoverLiveOut(Reg); + } } // Generate liveness for uses. @@ -451,12 +520,24 @@ bool RegPressureTracker::recede() { if (!LiveRegs.contains(Reg)) { // Adjust liveouts if LiveIntervals are available. if (RequireIntervals) { - const LiveInterval *LI = getInterval(Reg); - if (LI && !LI->killedAt(SlotIdx)) - discoverLiveOut(Reg); + const LiveRange *LR = getLiveRange(Reg); + if (LR) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + if (!LRQ.isKill() && !LRQ.valueDefined()) + discoverLiveOut(Reg); + } } increaseRegPressure(Reg); LiveRegs.insert(Reg); + if (LiveUses && !containsReg(*LiveUses, Reg)) + LiveUses->push_back(Reg); + } + } + if (TrackUntiedDefs) { + for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = RegOpers.Defs[i]; + if (TargetRegisterInfo::isVirtualRegister(Reg) && !LiveRegs.contains(Reg)) + UntiedDefs.insert(Reg); } } return true; @@ -464,6 +545,8 @@ bool RegPressureTracker::recede() { /// Advance across the current instruction. bool RegPressureTracker::advance() { + assert(!TrackUntiedDefs && "unsupported mode"); + // Check for the bottom of the analyzable region. if (CurrPos == MBB->end()) { closeRegion(); @@ -496,8 +579,8 @@ bool RegPressureTracker::advance() { // Kill liveness at last uses. bool lastUse = false; if (RequireIntervals) { - const LiveInterval *LI = getInterval(Reg); - lastUse = LI && LI->killedAt(SlotIdx); + const LiveRange *LR = getLiveRange(Reg); + lastUse = LR && LR->Query(SlotIdx).isKill(); } else { // Allocatable physregs are always single-use before register rewriting. @@ -533,9 +616,9 @@ bool RegPressureTracker::advance() { static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, ArrayRef<unsigned> NewPressureVec, RegPressureDelta &Delta, - const TargetRegisterInfo *TRI) { - int ExcessUnits = 0; - unsigned PSetID = ~0U; + const RegisterClassInfo *RCI, + ArrayRef<unsigned> LiveThruPressureVec) { + Delta.Excess = PressureChange(); for (unsigned i = 0, e = OldPressureVec.size(); i < e; ++i) { unsigned POld = OldPressureVec[i]; unsigned PNew = NewPressureVec[i]; @@ -543,7 +626,10 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, if (!PDiff) // No change in this set in the common case. continue; // Only consider change beyond the limit. - unsigned Limit = TRI->getRegPressureSetLimit(i); + unsigned Limit = RCI->getRegPressureSetLimit(i); + if (!LiveThruPressureVec.empty()) + Limit += LiveThruPressureVec[i]; + if (Limit > POld) { if (Limit > PNew) PDiff = 0; // Under the limit @@ -553,13 +639,12 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, else if (Limit > PNew) PDiff = Limit - POld; // Just obeyed limit. - if (std::abs(PDiff) > std::abs(ExcessUnits)) { - ExcessUnits = PDiff; - PSetID = i; + if (PDiff) { + Delta.Excess = PressureChange(i); + Delta.Excess.setUnitInc(PDiff); + break; } } - Delta.Excess.PSetID = PSetID; - Delta.Excess.UnitIncrease = ExcessUnits; } /// Find the max change in max pressure that either surpasses a critical PSet @@ -570,11 +655,11 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, /// RegPressureTracker API change to work with pressure differences. static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec, ArrayRef<unsigned> NewMaxPressureVec, - ArrayRef<PressureElement> CriticalPSets, + ArrayRef<PressureChange> CriticalPSets, ArrayRef<unsigned> MaxPressureLimit, RegPressureDelta &Delta) { - Delta.CriticalMax = PressureElement(); - Delta.CurrentMax = PressureElement(); + Delta.CriticalMax = PressureChange(); + Delta.CurrentMax = PressureChange(); unsigned CritIdx = 0, CritEnd = CriticalPSets.size(); for (unsigned i = 0, e = OldMaxPressureVec.size(); i < e; ++i) { @@ -583,23 +668,25 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec, if (PNew == POld) // No change in this set in the common case. continue; - while (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID < i) - ++CritIdx; + if (!Delta.CriticalMax.isValid()) { + while (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() < i) + ++CritIdx; - if (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID == i) { - int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].UnitIncrease; - if (PDiff > Delta.CriticalMax.UnitIncrease) { - Delta.CriticalMax.PSetID = i; - Delta.CriticalMax.UnitIncrease = PDiff; + if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == i) { + int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].getUnitInc(); + if (PDiff > 0) { + Delta.CriticalMax = PressureChange(i); + Delta.CriticalMax.setUnitInc(PDiff); + } } } - - // Find the greatest increase above MaxPressureLimit. + // Find the first increase above MaxPressureLimit. // (Ignores negative MDiff). - int MDiff = (int)PNew - (int)MaxPressureLimit[i]; - if (MDiff > Delta.CurrentMax.UnitIncrease) { - Delta.CurrentMax.PSetID = i; - Delta.CurrentMax.UnitIncrease = PNew; + if (!Delta.CurrentMax.isValid() && PNew > MaxPressureLimit[i]) { + Delta.CurrentMax = PressureChange(i); + Delta.CurrentMax.setUnitInc(PNew - POld); + if (CritIdx == CritEnd || Delta.CriticalMax.isValid()) + break; } } } @@ -614,7 +701,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { assert(!MI->isDebugValue() && "Expect a nondebug instruction."); // Account for register pressure similar to RegPressureTracker::recede(). - RegisterOperands RegOpers(TRI, MRI); + RegisterOperands RegOpers(TRI, MRI, /*IgnoreDead=*/true); collectOperands(MI, RegOpers); // Boost max pressure for all dead defs together. @@ -625,8 +712,19 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { // Kill liveness at live defs. for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { unsigned Reg = RegOpers.Defs[i]; - if (!containsReg(RegOpers.Uses, Reg)) - decreaseRegPressure(Reg); + bool DeadDef = false; + if (RequireIntervals) { + const LiveRange *LR = getLiveRange(Reg); + if (LR) { + SlotIndex SlotIdx = LIS->getInstructionIndex(MI); + LiveQueryResult LRQ = LR->Query(SlotIdx); + DeadDef = LRQ.isDeadDef(); + } + } + if (!DeadDef) { + if (!containsReg(RegOpers.Uses, Reg)) + decreaseRegPressure(Reg); + } } // Generate liveness for uses. for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { @@ -648,8 +746,9 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { /// result per-SUnit with enough information to adjust for the current /// scheduling position. But this works as a proof of concept. void RegPressureTracker:: -getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, - ArrayRef<PressureElement> CriticalPSets, +getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, + RegPressureDelta &Delta, + ArrayRef<PressureChange> CriticalPSets, ArrayRef<unsigned> MaxPressureLimit) { // Snapshot Pressure. // FIXME: The snapshot heap space should persist. But I'm planning to @@ -659,15 +758,117 @@ getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, bumpUpwardPressure(MI); - computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI); + computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, RCI, + LiveThruPressure); computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, MaxPressureLimit, Delta); - assert(Delta.CriticalMax.UnitIncrease >= 0 && - Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure"); + assert(Delta.CriticalMax.getUnitInc() >= 0 && + Delta.CurrentMax.getUnitInc() >= 0 && "cannot decrease max pressure"); // Restore the tracker's state. P.MaxSetPressure.swap(SavedMaxPressure); CurrSetPressure.swap(SavedPressure); + +#ifndef NDEBUG + if (!PDiff) + return; + + // Check if the alternate algorithm yields the same result. + RegPressureDelta Delta2; + getUpwardPressureDelta(MI, *PDiff, Delta2, CriticalPSets, MaxPressureLimit); + if (Delta != Delta2) { + dbgs() << "DELTA: " << *MI; + if (Delta.Excess.isValid()) + dbgs() << "Excess1 " << TRI->getRegPressureSetName(Delta.Excess.getPSet()) + << " " << Delta.Excess.getUnitInc() << "\n"; + if (Delta.CriticalMax.isValid()) + dbgs() << "Critic1 " << TRI->getRegPressureSetName(Delta.CriticalMax.getPSet()) + << " " << Delta.CriticalMax.getUnitInc() << "\n"; + if (Delta.CurrentMax.isValid()) + dbgs() << "CurrMx1 " << TRI->getRegPressureSetName(Delta.CurrentMax.getPSet()) + << " " << Delta.CurrentMax.getUnitInc() << "\n"; + if (Delta2.Excess.isValid()) + dbgs() << "Excess2 " << TRI->getRegPressureSetName(Delta2.Excess.getPSet()) + << " " << Delta2.Excess.getUnitInc() << "\n"; + if (Delta2.CriticalMax.isValid()) + dbgs() << "Critic2 " << TRI->getRegPressureSetName(Delta2.CriticalMax.getPSet()) + << " " << Delta2.CriticalMax.getUnitInc() << "\n"; + if (Delta2.CurrentMax.isValid()) + dbgs() << "CurrMx2 " << TRI->getRegPressureSetName(Delta2.CurrentMax.getPSet()) + << " " << Delta2.CurrentMax.getUnitInc() << "\n"; + llvm_unreachable("RegP Delta Mismatch"); + } +#endif +} + +/// This is a prototype of the fast version of querying register pressure that +/// does not directly depend on current liveness. It's still slow because we +/// recompute pressure change on-the-fly. This implementation only exists to +/// prove correctness. +/// +/// @param Delta captures information needed for heuristics. +/// +/// @param CriticalPSets Are the pressure sets that are known to exceed some +/// limit within the region, not necessarily at the current position. +/// +/// @param MaxPressureLimit Is the max pressure within the region, not +/// necessarily at the current position. +void RegPressureTracker:: +getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff, + RegPressureDelta &Delta, + ArrayRef<PressureChange> CriticalPSets, + ArrayRef<unsigned> MaxPressureLimit) const { + unsigned CritIdx = 0, CritEnd = CriticalPSets.size(); + for (PressureDiff::const_iterator + PDiffI = PDiff.begin(), PDiffE = PDiff.end(); + PDiffI != PDiffE && PDiffI->isValid(); ++PDiffI) { + + unsigned PSetID = PDiffI->getPSet(); + unsigned Limit = RCI->getRegPressureSetLimit(PSetID); + if (!LiveThruPressure.empty()) + Limit += LiveThruPressure[PSetID]; + + unsigned POld = CurrSetPressure[PSetID]; + unsigned MOld = P.MaxSetPressure[PSetID]; + unsigned MNew = MOld; + // Ignore DeadDefs here because they aren't captured by PressureChange. + unsigned PNew = POld + PDiffI->getUnitInc(); + assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld) && "PSet overflow"); + if (PNew > MOld) + MNew = PNew; + // Check if current pressure has exceeded the limit. + if (!Delta.Excess.isValid()) { + unsigned ExcessInc = 0; + if (PNew > Limit) + ExcessInc = POld > Limit ? PNew - POld : PNew - Limit; + else if (POld > Limit) + ExcessInc = Limit - POld; + if (ExcessInc) { + Delta.Excess = PressureChange(PSetID); + Delta.Excess.setUnitInc(ExcessInc); + } + } + // Check if max pressure has exceeded a critical pressure set max. + if (MNew == MOld) + continue; + if (!Delta.CriticalMax.isValid()) { + while (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() < PSetID) + ++CritIdx; + + if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == PSetID) { + int CritInc = (int)MNew - (int)CriticalPSets[CritIdx].getUnitInc(); + if (CritInc > 0 && CritInc <= INT16_MAX) { + Delta.CriticalMax = PressureChange(PSetID); + Delta.CriticalMax.setUnitInc(CritInc); + } + } + } + // Check if max pressure has exceeded the current max. + if (!Delta.CurrentMax.isValid() && MNew > MaxPressureLimit[PSetID]) { + Delta.CurrentMax = PressureChange(PSetID); + Delta.CurrentMax.setUnitInc(MNew - MOld); + } + } } /// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx). @@ -713,10 +914,12 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { // FIXME: allow the caller to pass in the list of vreg uses that remain // to be bottom-scheduled to avoid searching uses at each query. SlotIndex CurrIdx = getCurrSlot(); - const LiveInterval *LI = getInterval(Reg); - if (LI && LI->killedAt(SlotIdx) - && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) { - decreaseRegPressure(Reg); + const LiveRange *LR = getLiveRange(Reg); + if (LR) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) { + decreaseRegPressure(Reg); + } } } else if (!TargetRegisterInfo::isVirtualRegister(Reg)) { @@ -741,7 +944,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { /// This assumes that the current LiveIn set is sufficient. void RegPressureTracker:: getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, - ArrayRef<PressureElement> CriticalPSets, + ArrayRef<PressureChange> CriticalPSets, ArrayRef<unsigned> MaxPressureLimit) { // Snapshot Pressure. std::vector<unsigned> SavedPressure = CurrSetPressure; @@ -749,11 +952,12 @@ getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, bumpDownwardPressure(MI); - computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI); + computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, RCI, + LiveThruPressure); computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, MaxPressureLimit, Delta); - assert(Delta.CriticalMax.UnitIncrease >= 0 && - Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure"); + assert(Delta.CriticalMax.getUnitInc() >= 0 && + Delta.CurrentMax.getUnitInc() >= 0 && "cannot decrease max pressure"); // Restore the tracker's state. P.MaxSetPressure.swap(SavedMaxPressure); diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp index f82ccbe..75ebdaa 100644 --- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -31,9 +31,8 @@ using namespace llvm; /// setUsed - Set the register and its sub-registers as being used. void RegScavenger::setUsed(unsigned Reg) { - RegsAvailable.reset(Reg); - - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) RegsAvailable.reset(*SubRegs); } @@ -45,8 +44,8 @@ bool RegScavenger::isAliasUsed(unsigned Reg) const { } void RegScavenger::initRegState() { - for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(), - IE = Scavenged.end(); I != IE; ++I) { + for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { I->Reg = 0; I->Restore = NULL; } @@ -105,8 +104,8 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { } void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) { - BV.set(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) BV.set(*SubRegs); } @@ -182,8 +181,8 @@ void RegScavenger::forward() { MachineInstr *MI = MBBI; - for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(), - IE = Scavenged.end(); I != IE; ++I) { + for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { if (I->Restore != MI) continue; @@ -369,7 +368,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Exclude all the registers being used by the instruction. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { MachineOperand &MO = I->getOperand(i); - if (MO.isReg() && MO.getReg() != 0 && + if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && !TargetRegisterInfo::isVirtualRegister(MO.getReg())) Candidates.reset(MO.getReg()); } diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp index 07e5b47..75e3790 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -64,8 +64,8 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { /// specified node. bool SUnit::addPred(const SDep &D, bool Required) { // If this node already has this depenence, don't add a redundant one. - for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); - I != E; ++I) { + for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) { // Zero-latency weak edges may be added purely for heuristic ordering. Don't // add them if another kind of edge already exists. if (!Required && I->getSUnit() == D.getSUnit()) @@ -77,7 +77,7 @@ bool SUnit::addPred(const SDep &D, bool Required) { // Find the corresponding successor in N. SDep ForwardD = *I; ForwardD.setSUnit(this); - for (SmallVector<SDep, 4>::iterator II = PredSU->Succs.begin(), + for (SmallVectorImpl<SDep>::iterator II = PredSU->Succs.begin(), EE = PredSU->Succs.end(); II != EE; ++II) { if (*II == ForwardD) { II->setLatency(D.getLatency()); @@ -132,8 +132,8 @@ bool SUnit::addPred(const SDep &D, bool Required) { /// the specified node. void SUnit::removePred(const SDep &D) { // Find the matching predecessor. - for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); - I != E; ++I) + for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) if (*I == D) { // Find the corresponding successor in N. SDep P = D; diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index e4da6a4..7f1f9c4 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -36,6 +36,8 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <queue> + using namespace llvm; static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, @@ -98,7 +100,7 @@ static void getUnderlyingObjects(const Value *V, SmallVector<Value *, 4> Objs; GetUnderlyingObjects(const_cast<Value *>(V), Objs); - for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end(); + for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { V = *I; if (!Visited.insert(V)) @@ -116,12 +118,15 @@ static void getUnderlyingObjects(const Value *V, } while (!Working.empty()); } +typedef SmallVector<PointerIntPair<const Value *, 1, bool>, 4> +UnderlyingObjectsVector; + /// getUnderlyingObjectsForInstr - If this machine instr has memory reference /// information and it can be tracked to a normal reference to a known /// object, return the Value for that object. static void getUnderlyingObjectsForInstr(const MachineInstr *MI, - const MachineFrameInfo *MFI, - SmallVectorImpl<std::pair<const Value *, bool> > &Objects) { + const MachineFrameInfo *MFI, + UnderlyingObjectsVector &Objects) { if (!MI->hasOneMemOperand() || !(*MI->memoperands_begin())->getValue() || (*MI->memoperands_begin())->isVolatile()) @@ -134,8 +139,8 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, SmallVector<Value *, 4> Objs; getUnderlyingObjects(V, Objs); - for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end(); - I != IE; ++I) { + for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); + I != IE; ++I) { bool MayAlias = true; V = *I; @@ -155,7 +160,7 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, return; } - Objects.push_back(std::make_pair(V, MayAlias)); + Objects.push_back(UnderlyingObjectsVector::value_type(V, MayAlias)); } } @@ -175,14 +180,11 @@ void ScheduleDAGInstrs::finishBlock() { void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, - unsigned endcount) { + unsigned regioninstrs) { assert(bb == BB && "startBlock should set BB"); RegionBegin = begin; RegionEnd = end; - EndIndex = endcount; - MISUnitMap.clear(); - - ScheduleDAG::clearDAG(); + NumRegionInstrs = regioninstrs; } /// Close the current scheduling region. Don't clear any state in case the @@ -267,13 +269,10 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { SU->hasPhysRegDefs = true; Dep = SDep(SU, SDep::Data, *Alias); RegUse = UseSU->getInstr(); - Dep.setMinLatency( - SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, - RegUse, UseOp, /*FindMin=*/true)); } Dep.setLatency( - SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, - RegUse, UseOp, /*FindMin=*/false)); + SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, + UseOp)); ST.adjustSchedDependency(SU, UseSU, Dep); UseSU->addPred(Dep); @@ -310,10 +309,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias)); else { SDep Dep(SU, Kind, /*Reg=*/*Alias); - unsigned OutLatency = - SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()); - Dep.setMinLatency(OutLatency); - Dep.setLatency(OutLatency); + Dep.setLatency( + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); DefSU->addPred(Dep); } } @@ -389,10 +386,8 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { SUnit *DefSU = DefI->SU; if (DefSU != SU && DefSU != &ExitSU) { SDep Dep(SU, SDep::Output, Reg); - unsigned OutLatency = - SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()); - Dep.setMinLatency(OutLatency); - Dep.setLatency(OutLatency); + Dep.setLatency( + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); DefSU->addPred(Dep); } DefI->SU = SU; @@ -409,9 +404,19 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { MachineInstr *MI = SU->getInstr(); unsigned Reg = MI->getOperand(OperIdx).getReg(); + // Record this local VReg use. + VReg2UseMap::iterator UI = VRegUses.find(Reg); + for (; UI != VRegUses.end(); ++UI) { + if (UI->SU == SU) + break; + } + if (UI == VRegUses.end()) + VRegUses.insert(VReg2SUnit(Reg, SU)); + // Lookup this operand's reaching definition. assert(LIS && "vreg dependencies requires LiveIntervals"); - LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI)); + LiveQueryResult LRQ + = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI)); VNInfo *VNI = LRQ.valueIn(); // VNI will be valid because MachineOperand::readsReg() is checked by caller. @@ -427,10 +432,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { // Adjust the dependence latency using operand def/use information, then // allow the target to perform its own adjustments. int DefOp = Def->findRegisterDefOperandIdx(Reg); - dep.setLatency( - SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false)); - dep.setMinLatency( - SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true)); + dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx)); const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); @@ -472,8 +474,8 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, SmallVector<Value *, 4> Objs; getUnderlyingObjects(V, Objs); - for (SmallVector<Value *, 4>::iterator I = Objs.begin(), - IE = Objs.end(); I != IE; ++I) { + for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), + IE = Objs.end(); I != IE; ++I) { V = *I; if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { @@ -642,8 +644,7 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, bool isNormalMemory = false) { // If this is a false dependency, // do not add the edge, but rememeber the rejected node. - if (!EnableAASchedMI || - MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + if (!AA || MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); Dep.setLatency(TrueMemOrderLatency); SUb->addPred(Dep); @@ -671,7 +672,7 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, void ScheduleDAGInstrs::initSUnits() { // We'll be allocating one SUnit for each real instruction in the region, // which is contained within a basic block. - SUnits.reserve(BB->size()); + SUnits.reserve(NumRegionInstrs); for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { MachineInstr *MI = I; @@ -693,10 +694,22 @@ void ScheduleDAGInstrs::initSUnits() { /// DAG builder is an efficient place to do it because it already visits /// operands. void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, - RegPressureTracker *RPTracker) { + RegPressureTracker *RPTracker, + PressureDiffs *PDiffs) { + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI + : ST.useAA(); + AliasAnalysis *AAForDep = UseAA ? AA : 0; + + MISUnitMap.clear(); + ScheduleDAG::clearDAG(); + // Create an SUnit for each real instruction. initSUnits(); + if (PDiffs) + PDiffs->init(SUnits.size()); + // We build scheduling units by walking a block's instruction list from bottom // to top. @@ -722,10 +735,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, Uses.setUniverse(TRI->getNumRegs()); assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); - // FIXME: Allow SparseSet to reserve space for the creation of virtual - // registers during scheduling. Don't artificially inflate the Universe - // because we want to assert that vregs are not created during DAG building. + VRegUses.clear(); VRegDefs.setUniverse(MRI.getNumVirtRegs()); + VRegUses.setUniverse(MRI.getNumVirtRegs()); // Model data dependencies between instructions being scheduled and the // ExitSU. @@ -745,17 +757,18 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, DbgMI = MI; continue; } + SUnit *SU = MISUnitMap[MI]; + assert(SU && "No SUnit mapped to this MI"); + if (RPTracker) { - RPTracker->recede(); + PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : 0; + RPTracker->recede(/*LiveUses=*/0, PDiff); assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI"); } assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) && "Cannot schedule terminators or labels!"); - SUnit *SU = MISUnitMap[MI]; - assert(SU && "No SUnit mapped to this MI"); - // Add register-based dependencies (data, anti, and output). bool HasVRegDef = false; for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { @@ -833,20 +846,20 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, unsigned ChainLatency = 0; if (AliasChain->getInstr()->mayLoad()) ChainLatency = TrueMemOrderLatency; - addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes, ChainLatency); } AliasChain = SU; for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); + addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes); for (MapVector<const Value *, std::vector<SUnit *> >::iterator I = AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes, TrueMemOrderLatency); } adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, @@ -855,7 +868,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, AliasMemDefs.clear(); AliasMemUses.clear(); } else if (MI->mayStore()) { - SmallVector<std::pair<const Value *, bool>, 4> Objs; + UnderlyingObjectsVector Objs; getUnderlyingObjectsForInstr(MI, MFI, Objs); if (Objs.empty()) { @@ -864,10 +877,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, } bool MayAlias = false; - for (SmallVector<std::pair<const Value *, bool>, 4>::iterator - K = Objs.begin(), KE = Objs.end(); K != KE; ++K) { - const Value *V = K->first; - bool ThisMayAlias = K->second; + for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end(); + K != KE; ++K) { + const Value *V = K->getPointer(); + bool ThisMayAlias = K->getInt(); if (ThisMayAlias) MayAlias = true; @@ -879,7 +892,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, MapVector<const Value *, SUnit *>::iterator IE = ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); + addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes, + 0, true); I->second = SU; } else { if (ThisMayAlias) @@ -894,7 +908,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) - addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, J->second[i], RejectMemNodes, TrueMemOrderLatency, true); J->second.clear(); } @@ -903,11 +917,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // Add dependencies from all the PendingLoads, i.e. loads // with no underlying object. for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); // Add dependence on alias chain, if needed. if (AliasChain) - addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); + addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes); // But we also should check dependent instructions for the // SU in question. adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, @@ -929,7 +943,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (MI->isInvariantLoad(AA)) { // Invariant load, no chain dependencies needed! } else { - SmallVector<std::pair<const Value *, bool>, 4> Objs; + UnderlyingObjectsVector Objs; getUnderlyingObjectsForInstr(MI, MFI, Objs); if (Objs.empty()) { @@ -937,7 +951,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // potentially aliasing stores. for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); + addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes); PendingLoads.push_back(SU); MayAlias = true; @@ -945,10 +959,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, MayAlias = false; } - for (SmallVector<std::pair<const Value *, bool>, 4>::iterator + for (UnderlyingObjectsVector::iterator J = Objs.begin(), JE = Objs.end(); J != JE; ++J) { - const Value *V = J->first; - bool ThisMayAlias = J->second; + const Value *V = J->getPointer(); + bool ThisMayAlias = J->getInt(); if (ThisMayAlias) MayAlias = true; @@ -959,7 +973,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, MapVector<const Value *, SUnit *>::iterator IE = ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); + addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes, + 0, true); if (ThisMayAlias) AliasMemUses[V].push_back(SU); else @@ -969,7 +984,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0); // Add dependencies on alias and barrier chains, if needed. if (MayAlias && AliasChain) - addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); + addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes); if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Barrier)); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2e09ec0..43f72c5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -35,6 +35,8 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; @@ -43,6 +45,7 @@ STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); +STATISTIC(SlicedLoads, "Number of load sliced"); namespace { static cl::opt<bool> @@ -53,6 +56,14 @@ namespace { CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Include global information in alias analysis")); + /// Hidden option to stress test load slicing, i.e., when this option + /// is enabled, load slicing bypasses most of its profitability guards. + static cl::opt<bool> + StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, + cl::desc("Bypass the profitability model of load " + "slicing"), + cl::init(false)); + //------------------------------ DAGCombiner ---------------------------------// class DAGCombiner { @@ -62,6 +73,7 @@ namespace { CodeGenOpt::Level OptLevel; bool LegalOperations; bool LegalTypes; + bool ForCodeSize; // Worklist of all of the nodes that need to be simplified. // @@ -144,6 +156,7 @@ namespace { bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); + bool SliceUpLoad(SDNode *N); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); @@ -154,8 +167,8 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); - void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, - SDValue Trunc, SDValue ExtLoad, DebugLoc DL, + void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, + SDValue Trunc, SDValue ExtLoad, SDLoc DL, ISD::NodeType ExtType); /// combine - call the node-specific routine that knows how to fold each @@ -246,18 +259,18 @@ namespace { SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); - SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); + SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); SDValue visitShiftByConstant(SDNode *N, unsigned Amt); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); - SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2); - SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, + SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2); + SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare = false); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, - DebugLoc DL, bool foldBooleans = true); + SDLoc DL, bool foldBooleans = true); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); @@ -267,7 +280,7 @@ namespace { SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); - SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); + SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue TransformFPLoadStorePair(SDNode *N); @@ -279,15 +292,15 @@ namespace { /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void GatherAllAliases(SDNode *N, SDValue OriginalChain, - SmallVector<SDValue, 8> &Aliases); + SmallVectorImpl<SDValue> &Aliases); /// isAlias - Return true if there is any possibility that the two addresses /// overlap. - bool isAlias(SDValue Ptr1, int64_t Size1, + bool isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, const Value *SrcValue1, int SrcValueOffset1, unsigned SrcValueAlign1, const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, + SDValue Ptr2, int64_t Size2, bool IsVolatile2, const Value *SrcValue2, int SrcValueOffset2, unsigned SrcValueAlign2, const MDNode *TBAAInfo2) const; @@ -299,7 +312,7 @@ namespace { /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, + SDValue &Ptr, int64_t &Size, bool &IsVolatile, const Value *&SrcValue, int &SrcValueOffset, unsigned &SrcValueAlignment, const MDNode *&TBAAInfo) const; @@ -315,8 +328,15 @@ namespace { public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) - : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), - OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), + OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { + AttributeSet FnAttrs = + DAG.getMachineFunction().getFunction()->getAttributes(); + ForCodeSize = + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize) || + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + } /// Run - runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); @@ -326,7 +346,11 @@ namespace { /// getShiftAmountTy - Returns a type large enough to hold any valid /// shift amount - before type legalization these can be huge. EVT getShiftAmountTy(EVT LHSTy) { - return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy(); + assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); + if (LHSTy.isVector()) + return LHSTy; + return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) + : TLI.getPointerTy(); } /// isTypeLegal - This method returns true if we are running before type @@ -335,6 +359,12 @@ namespace { if (!LegalTypes) return true; return TLI.isTypeLegal(VT); } + + /// getSetCCResultType - Convenience wrapper around + /// TargetLowering::getSetCCResultType + EVT getSetCCResultType(EVT VT) const { + return TLI.getSetCCResultType(*DAG.getContext(), VT); + } }; } @@ -482,12 +512,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, if (isNegatibleForFree(Op.getOperand(0), LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options, Depth+1)) - return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), Op.getOperand(1)); // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, Depth+1), Op.getOperand(0)); @@ -501,7 +531,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, return Op.getOperand(1); // fold (fneg (fsub A, B)) -> (fsub B, A) - return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(0)); case ISD::FMUL: @@ -512,24 +542,24 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, if (isNegatibleForFree(Op.getOperand(0), LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options, Depth+1)) - return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), Op.getOperand(1)); // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) - return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0), GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, Depth+1)); case ISD::FP_EXTEND: case ISD::FSIN: - return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1)); case ISD::FP_ROUND: - return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), Op.getOperand(1)); @@ -573,7 +603,7 @@ static bool isOneUseSetCC(SDValue N) { return false; } -SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, +SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { @@ -587,7 +617,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, } if (N0.hasOneUse()) { // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use - SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorkList(OpNode.getNode()); return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); @@ -605,7 +635,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, } if (N1.hasOneUse()) { // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use - SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); AddToWorkList(OpNode.getNode()); return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); @@ -706,7 +736,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { } void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { - DebugLoc dl = Load->getDebugLoc(); + SDLoc dl(Load); EVT VT = Load->getValueType(0); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); @@ -725,7 +755,7 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { Replace = false; - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) @@ -735,9 +765,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { Replace = true; return DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - MemVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + MemVT, LD->getMemOperand()); } unsigned Opc = Op.getOpcode(); @@ -767,7 +795,7 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) return SDValue(); EVT OldVT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (NewOp.getNode() == 0) @@ -782,7 +810,7 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { EVT OldVT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (NewOp.getNode() == 0) @@ -845,7 +873,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(Opc, dl, PVT, NN0, NN1)); } @@ -892,7 +920,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); } @@ -923,7 +951,7 @@ SDValue DAGCombiner::PromoteExtend(SDValue Op) { // fold (aext (sext x)) -> (sext x) DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); - return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0)); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); } return SDValue(); } @@ -948,7 +976,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDNode *N = Op.getNode(); LoadSDNode *LD = cast<LoadSDNode>(N); EVT MemVT = LD->getMemoryVT(); @@ -958,9 +986,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { : LD->getExtensionType(); SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - MemVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + MemVT, LD->getMemOperand()); SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); DEBUG(dbgs() << "\nPromoting "; @@ -1008,7 +1034,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // try and combine it. while (!WorkListContents.empty()) { SDNode *N; - // The WorkListOrder holds the SDNodes in order, but it may contain duplicates. + // The WorkListOrder holds the SDNodes in order, but it may contain + // duplicates. // In order to avoid a linear scan, we use a set (O(log N)) to hold what the // worklist *should* contain, and check the node we want to visit is should // actually be visited. @@ -1245,7 +1272,7 @@ static SDValue getInputChainForNode(SDNode *N) { if (unsigned NumOps = N->getNumOperands()) { if (N->getOperand(0).getValueType() == MVT::Other) return N->getOperand(0); - else if (N->getOperand(NumOps-1).getValueType() == MVT::Other) + if (N->getOperand(NumOps-1).getValueType() == MVT::Other) return N->getOperand(NumOps-1); for (unsigned i = 1; i < NumOps-1; ++i) if (N->getOperand(i).getValueType() == MVT::Other) @@ -1320,7 +1347,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Result = DAG.getEntryNode(); } else { // New and improved token factor. - Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, &Ops[0], Ops.size()); } @@ -1350,7 +1377,7 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { } static -SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, +SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1, SelectionDAG &DAG) { EVT VT = N0.getValueType(); SDValue N00 = N0.getOperand(0); @@ -1360,10 +1387,10 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && isa<ConstantSDNode>(N00.getOperand(1))) { // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) - N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, - DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT, + N0 = DAG.getNode(ISD::ADD, SDLoc(N0), VT, + DAG.getNode(ISD::SHL, SDLoc(N00), VT, N00.getOperand(0), N01), - DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT, + DAG.getNode(ISD::SHL, SDLoc(N01), VT, N00.getOperand(1), N01)); return DAG.getNode(ISD::ADD, DL, VT, N0, N1); } @@ -1400,7 +1427,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); // fold (add x, 0) -> x if (N1C && N1C->isNullValue()) return N0; @@ -1408,28 +1435,28 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, + return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, GA->getOffset() + (uint64_t)N1C->getSExtValue()); // fold ((c1-A)+c2) -> (c1+c2)-A if (N1C && N0.getOpcode() == ISD::SUB) if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(N1C->getAPIntValue()+ N0C->getAPIntValue(), VT), N0.getOperand(1)); // reassociate add - SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1); + SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1); if (RADD.getNode() != 0) return RADD; // fold ((0-A) + B) -> B-A if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); // fold (A + (0-B)) -> A-B if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); // fold (A+(B-A)) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) return N1.getOperand(0); @@ -1439,18 +1466,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // fold (A+(B-(A+C))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(0)) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1).getOperand(1)); // fold (A+(B-(C+A))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(1)) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1).getOperand(0)); // fold (A+((B-A)+or-C)) to (B+or-C) if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && N1.getOperand(0).getOpcode() == ISD::SUB && N0 == N1.getOperand(0).getOperand(1)) - return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT, + return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, N1.getOperand(0).getOperand(0), N1.getOperand(1)); // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant @@ -1461,9 +1488,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N11 = N1.getOperand(1); if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, - DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10), - DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, + DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), + DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); } if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) @@ -1481,17 +1508,17 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); } } // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) { - SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG); + SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG); if (Result.getNode()) return Result; } if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { - SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG); + SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG); if (Result.getNode()) return Result; } @@ -1501,8 +1528,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) if (C->getAPIntValue() == 0) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, - DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, + DAG.getNode(ISD::SHL, SDLoc(N), VT, N1.getOperand(0).getOperand(1), N1.getOperand(1))); if (N0.getOpcode() == ISD::SHL && @@ -1510,8 +1537,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) if (C->getAPIntValue() == 0) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, - DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, + DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0).getOperand(1), N0.getOperand(1))); @@ -1524,7 +1551,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) // and similar xforms where the inner op is either ~0 or 0. if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); } } @@ -1533,7 +1560,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.getOperand(0).getValueType() == MVT::i1 && !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } @@ -1550,18 +1577,18 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // If the flag result is dead, turn this into an ADD. if (!N->hasAnyUseOfValue(1)) - return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1), + return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Glue)); + SDLoc(N), MVT::Glue)); // canonicalize constant to RHS. if (N0C && !N1C) - return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); + return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); // fold (addc x, 0) -> x + no carry out if (N1C && N1C->isNullValue()) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Glue)); + SDLoc(N), MVT::Glue)); // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. APInt LHSZero, LHSOne; @@ -1574,9 +1601,9 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) - return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), + return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Glue)); + SDLoc(N), MVT::Glue)); } return SDValue(); @@ -1591,30 +1618,25 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), + return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), N1, N0, CarryIn); // fold (adde x, y, false) -> (addc x, y) if (CarryIn.getOpcode() == ISD::CARRY_FALSE) - return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1); + return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); return SDValue(); } // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. -static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT, - SelectionDAG &DAG, bool LegalOperations) { - if (!VT.isVector()) { +static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, + SelectionDAG &DAG, + bool LegalOperations, bool LegalTypes) { + if (!VT.isVector()) + return DAG.getConstant(0, VT); + if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) return DAG.getConstant(0, VT); - } - if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { - // Produce a vector of zeros. - SDValue El = DAG.getConstant(0, VT.getVectorElementType()); - std::vector<SDValue> Ops(VT.getVectorNumElements(), El); - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, - &Ops[0], Ops.size()); - } return SDValue(); } @@ -1640,17 +1662,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // fold (sub x, x) -> 0 // FIXME: Refactor this and xor and other similar operations together. if (N0 == N1) - return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); + return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // fold (sub c1, c2) -> c1-c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); // fold (sub x, c) -> (add x, -c) if (N1C) - return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, DAG.getConstant(-N1C->getAPIntValue(), VT)); // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) if (N0C && N0C->isAllOnesValue()) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold A-(A-B) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) return N1.getOperand(1); @@ -1664,7 +1686,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), VT); - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC, N1.getOperand(0)); } // fold ((A+(B+or-C))-B) -> A+or-C @@ -1672,19 +1694,19 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { (N0.getOperand(1).getOpcode() == ISD::SUB || N0.getOperand(1).getOpcode() == ISD::ADD) && N0.getOperand(1).getOperand(0) == N1) - return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT, + return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1).getOperand(1)); // fold ((A+(C+B))-B) -> A+C if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD && N0.getOperand(1).getOperand(1) == N1) - return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1).getOperand(0)); // fold ((A-(B-C))-C) -> A-B if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB && N0.getOperand(1).getOperand(1) == N1) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1).getOperand(0)); // If either operand of a sub is undef, the result is undef @@ -1698,7 +1720,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { // fold (sub Sym, c) -> Sym-c if (N1C && GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, + return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, GA->getOffset() - (uint64_t)N1C->getSExtValue()); // fold (sub Sym+c1, Sym+c2) -> c1-c2 @@ -1720,25 +1742,25 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) { // If the flag result is dead, turn this into an SUB. if (!N->hasAnyUseOfValue(1)) - return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1), - DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // fold (subc x, x) -> 0 + no borrow if (N0 == N1) return CombineTo(N, DAG.getConstant(0, VT), - DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // fold (subc x, 0) -> x + no borrow if (N1C && N1C->isNullValue()) - return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow if (N0C && N0C->isAllOnesValue()) - return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0), - DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); return SDValue(); @@ -1751,63 +1773,102 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) { // fold (sube x, y, false) -> (subc x, y) if (CarryIn.getOpcode() == ISD::CARRY_FALSE) - return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1); + return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); return SDValue(); } +/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose +/// elements are all the same constant or undefined. +static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { + BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); + if (!C) + return false; + + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + EVT EltVT = N->getValueType(0).getVectorElementType(); + return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs) && + EltVT.getSizeInBits() >= SplatBitSize); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); + // fold (mul x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + + bool N0IsConst = false; + bool N1IsConst = false; + APInt ConstValue0, ConstValue1; // fold vector ops if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); + N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); + } else { + N0IsConst = dyn_cast<ConstantSDNode>(N0) != 0; + ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() + : APInt(); + N1IsConst = dyn_cast<ConstantSDNode>(N1) != 0; + ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() + : APInt(); } - // fold (mul x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); // fold (mul c1, c2) -> c1*c2 - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C); + if (N0IsConst && N1IsConst) + return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode()); + // canonicalize constant to RHS - if (N0C && !N1C) - return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0); + if (N0IsConst && !N1IsConst) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 - if (N1C && N1C->isNullValue()) + if (N1IsConst && ConstValue1 == 0) return N1; + // We require a splat of the entire scalar bit width for non-contiguous + // bit patterns. + bool IsFullSplat = + ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); + // fold (mul x, 1) -> x + if (N1IsConst && ConstValue1 == 1 && IsFullSplat) + return N0; // fold (mul x, -1) -> 0-x - if (N1C && N1C->isAllOnesValue()) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + if (N1IsConst && ConstValue1.isAllOnesValue()) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), N0); // fold (mul x, (1 << c)) -> x << c - if (N1C && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, - DAG.getConstant(N1C->getAPIntValue().logBase2(), + if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, + DAG.getConstant(ConstValue1.logBase2(), getShiftAmountTy(N0.getValueType()))); // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c - if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { - unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); + if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { + unsigned Log2Val = (-ConstValue1).logBase2(); // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), - DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, DAG.getConstant(Log2Val, getShiftAmountTy(N0.getValueType())))); } + + APInt Val; // (mul (shl X, c1), c2) -> (mul X, c2 << c1) - if (N1C && N0.getOpcode() == ISD::SHL && - isa<ConstantSDNode>(N0.getOperand(1))) { - SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + if (N1IsConst && N0.getOpcode() == ISD::SHL && + (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1)))) { + SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); AddToWorkList(C3.getNode()); - return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); } @@ -1816,7 +1877,9 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { { SDValue Sh(0,0), Y(0,0); // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). - if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && + if (N0.getOpcode() == ISD::SHL && + (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1))) && N0.getNode()->hasOneUse()) { Sh = N0; Y = N1; } else if (N1.getOpcode() == ISD::SHL && @@ -1826,24 +1889,25 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } if (Sh.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y); - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1)); } } // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) - if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && - isa<ConstantSDNode>(N0.getOperand(1))) - return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, - DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT, + if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1)))) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, + DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1), - DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT, + DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1)); // reassociate mul - SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1); + SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1); if (RMUL.getNode() != 0) return RMUL; @@ -1871,13 +1935,13 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return N0; // fold (sdiv X, -1) -> 0-X if (N1C && N1C->isAllOnesValue()) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), N0); // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 if (!VT.isVector()) { if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(), + return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), N0, N1); } // fold (sdiv X, pow2) -> simple ops after legalize @@ -1892,19 +1956,19 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); // Splat the sign bit into the register - SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, + SDValue SGN = DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, DAG.getConstant(VT.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); AddToWorkList(SGN.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; - SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN, + SDValue SRL = DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, DAG.getConstant(VT.getSizeInBits() - lg2, getShiftAmountTy(SGN.getValueType()))); - SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL); + SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); AddToWorkList(SRL.getNode()); AddToWorkList(ADD.getNode()); // Divide by pow2 - SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD, + SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); // If we're dividing by a positive value, we're done. Otherwise, we must @@ -1913,7 +1977,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return SRA; AddToWorkList(SRA.getNode()); - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); } @@ -1952,7 +2016,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); // fold (udiv x, (1 << c)) -> x >>u c if (N1C && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, DAG.getConstant(N1C->getAPIntValue().logBase2(), getShiftAmountTy(N0.getValueType()))); // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 @@ -1960,13 +2024,13 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { EVT ADDVT = N1.getOperand(1).getValueType(); - SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, + SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT, N1.getOperand(1), DAG.getConstant(SHC->getAPIntValue() .logBase2(), ADDVT)); AddToWorkList(Add.getNode()); - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add); + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); } } } @@ -2000,19 +2064,19 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 if (!VT.isVector()) { if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1); } // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. if (N1C && !N1C->isNullValue()) { - SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1); + SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); AddToWorkList(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, OptimizedDiv, N1); - SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); AddToWorkList(Mul.getNode()); return Sub; } @@ -2040,18 +2104,18 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); // fold (urem x, pow2) -> (and x, pow2-1) if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, DAG.getConstant(N1C->getAPIntValue()-1,VT)); // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) if (N1.getOpcode() == ISD::SHL) { if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { SDValue Add = - DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, + DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT)); AddToWorkList(Add.getNode()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); } } } @@ -2059,13 +2123,13 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. if (N1C && !N1C->isNullValue()) { - SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1); + SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); AddToWorkList(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, OptimizedDiv, N1); - SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); AddToWorkList(Mul.getNode()); return Sub; } @@ -2086,14 +2150,14 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // fold (mulhs x, 0) -> 0 if (N1C && N1C->isNullValue()) return N1; // fold (mulhs x, 1) -> (sra x, size(x)-1) if (N1C && N1C->getAPIntValue() == 1) - return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0, + return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0, DAG.getConstant(N0.getValueType().getSizeInBits() - 1, getShiftAmountTy(N0.getValueType()))); // fold (mulhs x, undef) -> 0 @@ -2124,7 +2188,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // fold (mulhu x, 0) -> 0 if (N1C && N1C->isNullValue()) @@ -2166,7 +2230,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, if (!HiExists && (!LegalOperations || TLI.isOperationLegal(LoOp, N->getValueType(0)))) { - SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->op_begin(), N->getNumOperands()); return CombineTo(N, Res, Res); } @@ -2176,7 +2240,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, if (!LoExists && (!LegalOperations || TLI.isOperationLegal(HiOp, N->getValueType(1)))) { - SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->op_begin(), N->getNumOperands()); return CombineTo(N, Res, Res); } @@ -2187,7 +2251,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, // If the two computed results can be simplified separately, separate them. if (LoExists) { - SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->op_begin(), N->getNumOperands()); AddToWorkList(Lo.getNode()); SDValue LoOpt = combine(Lo.getNode()); @@ -2198,7 +2262,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, } if (HiExists) { - SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->op_begin(), N->getNumOperands()); AddToWorkList(Hi.getNode()); SDValue HiOpt = combine(Hi.getNode()); @@ -2216,7 +2280,7 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { if (Res.getNode()) return Res; EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. @@ -2246,7 +2310,7 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { if (Res.getNode()) return Res; EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. @@ -2275,7 +2339,7 @@ SDValue DAGCombiner::visitSMULO(SDNode *N) { // (smulo x, 2) -> (saddo x, x) if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) if (C2->getAPIntValue() == 2) - return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(), + return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), N->getOperand(0), N->getOperand(0)); return SDValue(); @@ -2285,7 +2349,7 @@ SDValue DAGCombiner::visitUMULO(SDNode *N) { // (umulo x, 2) -> (uaddo x, x) if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) if (C2->getAPIntValue() == 2) - return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(), + return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), N->getOperand(0), N->getOperand(0)); return SDValue(); @@ -2336,11 +2400,11 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { !VT.isVector() && Op0VT == N1.getOperand(0).getValueType() && (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { - SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); AddToWorkList(ORNode.getNode()); - return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); } // For each of OP in SHL/SRL/SRA/AND... @@ -2350,11 +2414,11 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && N0.getOperand(1) == N1.getOperand(1)) { - SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); AddToWorkList(ORNode.getNode()); - return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode, N0.getOperand(1)); } @@ -2372,7 +2436,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue In1 = N1.getOperand(0); EVT In0Ty = In0.getValueType(); EVT In1Ty = In1.getValueType(); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // If both incoming values are integers, and the original types are the // same. if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { @@ -2414,10 +2478,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { } if (SameMask) { - SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, + SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0.getOperand(0), N1.getOperand(0)); AddToWorkList(Op.getNode()); - return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op, + return DAG.getVectorShuffle(VT, SDLoc(N), Op, DAG.getUNDEF(VT), &SVN0->getMask()[0]); } } @@ -2460,7 +2524,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); // fold (and x, -1) -> x if (N1C && N1C->isAllOnesValue()) return N0; @@ -2469,7 +2533,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { APInt::getAllOnesValue(BitWidth))) return DAG.getConstant(0, VT); // reassociate and - SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1); + SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1); if (RAND.getNode() != 0) return RAND; // fold (and (or x, C), D) -> D if (C & D) == D @@ -2483,7 +2547,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { APInt Mask = ~N1C->getAPIntValue(); Mask = Mask.trunc(N0Op0.getValueSizeInBits()); if (DAG.MaskedValueIsZero(N0Op0, Mask)) { - SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N0.getValueType(), N0Op0); // Replace uses of the AND with uses of the Zero extend node. @@ -2496,7 +2560,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } - // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> + // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must // already be zero by virtue of the width of the base type of the load. // @@ -2573,7 +2637,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue NewLoad(Load, 0); if (Load->getExtensionType() == ISD::EXTLOAD) { NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, - Load->getValueType(0), Load->getDebugLoc(), + Load->getValueType(0), SDLoc(Load), Load->getChain(), Load->getBasePtr(), Load->getOffset(), Load->getMemoryVT(), Load->getMemOperand()); @@ -2604,26 +2668,39 @@ SDValue DAGCombiner::visitAND(SDNode *N) { LL.getValueType().isInteger()) { // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { - SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), LR.getValueType(), LL, RL); AddToWorkList(ORNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { - SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(), + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), LR.getValueType(), LL, RL); AddToWorkList(ANDNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); } // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { - SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), LR.getValueType(), LL, RL); AddToWorkList(ORNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } } + // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) + if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && + Op0 == Op1 && LL.getValueType().isInteger() && + Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() && + cast<ConstantSDNode>(RR)->isAllOnesValue()) || + (cast<ConstantSDNode>(LR)->isAllOnesValue() && + cast<ConstantSDNode>(RR)->isNullValue()))) { + SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), + LL, DAG.getConstant(1, LL.getValueType())); + AddToWorkList(ADDNode.getNode()); + return DAG.getSetCC(SDLoc(N), VT, ADDNode, + DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); + } // canonicalize equivalent to ll == rl if (LL == RR && LR == RL) { Op1 = ISD::getSetCCSwappedOperands(Op1); @@ -2636,8 +2713,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { (!LegalOperations || (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && TLI.isOperationLegal(ISD::SETCC, - TLI.getSetCCResultType(N0.getSimpleValueType()))))) - return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + getSetCCResultType(N0.getSimpleValueType()))))) + return DAG.getSetCC(SDLoc(N), N0.getValueType(), LL, LR, Result); } } @@ -2665,11 +2742,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + MemVT, LN0->getMemOperand()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2687,12 +2762,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, - LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2710,7 +2782,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0); if (LN0->getExtensionType() != ISD::SEXTLOAD && - LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) { + LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); @@ -2721,11 +2793,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue NewLoad = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, - LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), - ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, + LN0->getChain(), LN0->getBasePtr(), ExtVT, + LN0->getMemOperand()); AddToWorkList(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2748,7 +2818,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { unsigned LVTStoreBytes = LoadedVT.getStoreSize(); unsigned EVTStoreBytes = ExtVT.getStoreSize(); unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; - NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, + NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, NewPtr, DAG.getConstant(PtrOff, PtrType)); Alignment = MinAlign(Alignment, PtrOff); } @@ -2757,11 +2827,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue Load = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr, LN0->getPointerInfo(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - Alignment); + Alignment, LN0->getTBAAInfo()); AddToWorkList(N); CombineTo(LN0, Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2786,7 +2856,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { ADDC |= Mask; if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { SDValue NewAdd = - DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, + DAG.getNode(ISD::ADD, SDLoc(N0), VT, N0.getOperand(0), DAG.getConstant(ADDC, VT)); CombineTo(N0.getNode(), NewAdd); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2797,6 +2867,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } + // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) + if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { + SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false); + if (BSwap.getNode()) + return BSwap; + } + return SDValue(); } @@ -2881,17 +2959,27 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, if (N00 != N10) return SDValue(); - // Make sure everything beyond the low halfword is zero since the SRL 16 - // will clear the top bits. + // Make sure everything beyond the low halfword gets set to zero since the SRL + // 16 will clear the top bits. unsigned OpSizeInBits = VT.getSizeInBits(); - if (DemandHighBits && OpSizeInBits > 16 && - (!LookPassAnd0 || !LookPassAnd1) && - !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) - return SDValue(); + if (DemandHighBits && OpSizeInBits > 16) { + // If the left-shift isn't masked out then the only way this is a bswap is + // if all bits beyond the low 8 are 0. In that case the entire pattern + // reduces to a left shift anyway: leave it for other parts of the combiner. + if (!LookPassAnd0) + return SDValue(); - SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00); + // However, if the right shift isn't masked out then it might be because + // it's not needed. See if we can spot that too. + if (!LookPassAnd1 && + !DAG.MaskedValueIsZero( + N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) + return SDValue(); + } + + SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); if (OpSizeInBits > 16) - Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res, + Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res, DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); return Res; } @@ -2899,7 +2987,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, /// isBSwapHWordElement - Return true if the specified node is an element /// that makes up a 32-bit packed halfword byteswap. i.e. /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) -static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) { +static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) { if (!N.getNode()->hasOneUse()) return false; @@ -3024,19 +3112,19 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) return SDValue(); - SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, + SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, SDValue(Parts[0],0)); - // Result of the bswap should be rotated by 16. If it's not legal, than + // Result of the bswap should be rotated by 16. If it's not legal, then // do (x << 16) | (x >> 16). SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) - return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); + return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt); if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) - return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, - DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), - DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt)); + return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt); + return DAG.getNode(ISD::OR, SDLoc(N), VT, + DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt), + DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt)); } SDValue DAGCombiner::visitOR(SDNode *N) { @@ -3076,7 +3164,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); // fold (or x, 0) -> x if (N1C && N1C->isNullValue()) return N0; @@ -3096,7 +3184,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return BSwap; // reassociate or - SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); + SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1); if (ROR.getNode() != 0) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) @@ -3105,8 +3193,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { isa<ConstantSDNode>(N0.getOperand(1))) { ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, + DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); } @@ -3121,19 +3209,19 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) if (cast<ConstantSDNode>(LR)->isNullValue() && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { - SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(), + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), LR.getValueType(), LL, RL); AddToWorkList(ORNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) if (cast<ConstantSDNode>(LR)->isAllOnesValue() && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { - SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(), + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), LR.getValueType(), LL, RL); AddToWorkList(ANDNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); } } // canonicalize equivalent to ll == rl @@ -3148,8 +3236,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { (!LegalOperations || (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && TLI.isOperationLegal(ISD::SETCC, - TLI.getSetCCResultType(N0.getValueType()))))) - return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + getSetCCResultType(N0.getValueType()))))) + return DAG.getSetCC(SDLoc(N), N0.getValueType(), LL, LR, Result); } } @@ -3176,15 +3264,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { - SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1.getOperand(0)); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, + return DAG.getNode(ISD::AND, SDLoc(N), VT, X, DAG.getConstant(LHSMask | RHSMask, VT)); } } // See if this is some rotate idiom. - if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) + if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) return SDValue(Rot, 0); // Simplify the operands using demanded-bits information. @@ -3217,7 +3305,7 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. -SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { +SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); if (!TLI.isTypeLegal(VT)) return 0; @@ -3292,33 +3380,9 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if (LHSMask.getNode() || RHSMask.getNode()) return 0; - // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) - // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) - if (RHSShiftAmt.getOpcode() == ISD::SUB && - LHSShiftAmt == RHSShiftAmt.getOperand(1)) { - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { - return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } - } - } - - // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) - // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) - if (LHSShiftAmt.getOpcode() == ISD::SUB && - RHSShiftAmt == LHSShiftAmt.getOperand(1)) { - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { - return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, - HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - } - } - } - - // Look for sign/zext/any-extended or truncate cases: + // If the shift amount is sign/zext/any-extended just peel it off. + SDValue LExtOp0 = LHSShiftAmt; + SDValue RExtOp0 = RHSShiftAmt; if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || @@ -3327,37 +3391,31 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { - SDValue LExtOp0 = LHSShiftAmt.getOperand(0); - SDValue RExtOp0 = RHSShiftAmt.getOperand(0); - if (RExtOp0.getOpcode() == ISD::SUB && - RExtOp0.getOperand(1) == LExtOp0) { - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotl x, y) - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotr x, (sub 32, y)) - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { - return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, - LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } - } - } else if (LExtOp0.getOpcode() == ISD::SUB && - RExtOp0 == LExtOp0.getOperand(1)) { - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotr x, y) - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotl x, (sub 32, y)) - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { - return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, - LHSShiftArg, - HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - } - } - } + LExtOp0 = LHSShiftAmt.getOperand(0); + RExtOp0 = RHSShiftAmt.getOperand(0); + } + + if (RExtOp0.getOpcode() == ISD::SUB && RExtOp0.getOperand(1) == LExtOp0) { + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotl x, y) + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotr x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) + if (SUBC->getAPIntValue() == OpSizeInBits) + return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); + } else if (LExtOp0.getOpcode() == ISD::SUB && + RExtOp0 == LExtOp0.getOperand(1)) { + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotr x, y) + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotl x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) + if (SUBC->getAPIntValue() == OpSizeInBits) + return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, + HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); } return 0; @@ -3396,12 +3454,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold (xor x, 0) -> x if (N1C && N1C->isNullValue()) return N0; // reassociate xor - SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1); + SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1); if (RXOR.getNode() != 0) return RXOR; @@ -3417,9 +3475,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { default: llvm_unreachable("Unhandled SetCC Equivalent!"); case ISD::SETCC: - return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC); + return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); case ISD::SELECT_CC: - return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2), + return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), N0.getOperand(3), NotCC); } } @@ -3430,10 +3488,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { N0.getNode()->hasOneUse() && isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ SDValue V = N0.getOperand(0); - V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V, + V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, DAG.getConstant(1, V.getValueType())); AddToWorkList(V.getNode()); - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc @@ -3442,10 +3500,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); - return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants @@ -3454,28 +3512,36 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); - return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); } } + // fold (xor (and x, y), y) -> (and (not x), y) + if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + N0->getOperand(1) == N1) { + SDValue X = N0->getOperand(0); + SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); + AddToWorkList(NotX.getNode()); + return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); + } // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) if (N1C && N0.getOpcode() == ISD::XOR) { ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); if (N00C) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1), + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1), DAG.getConstant(N1C->getAPIntValue() ^ N00C->getAPIntValue(), VT)); if (N01C) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(N1C->getAPIntValue() ^ N01C->getAPIntValue(), VT)); } // fold (xor x, x) -> 0 if (N0 == N1) - return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); + return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) if (N0.getOpcode() == N1.getOpcode()) { @@ -3548,17 +3614,17 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { } // Fold the constants, shifting the binop RHS by the shift amount. - SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(), + SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), N->getValueType(0), LHS->getOperand(1), N->getOperand(1)); // Create the new shift. SDValue NewShift = DAG.getNode(N->getOpcode(), - LHS->getOperand(0).getDebugLoc(), + SDLoc(LHS->getOperand(0)), VT, LHS->getOperand(0), N->getOperand(1)); // Create the new binop. - return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS); + return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); } SDValue DAGCombiner::visitSHL(SDNode *N) { @@ -3569,6 +3635,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (shl c1, c2) -> c1<<c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); @@ -3598,10 +3670,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC = TruncC.trunc(TruncVT.getSizeInBits()); - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, - DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, + DAG.getNode(ISD::AND, SDLoc(N), TruncVT, DAG.getNode(ISD::TRUNCATE, - N->getDebugLoc(), + SDLoc(N), TruncVT, N100), DAG.getConstant(TruncC, TruncVT))); } @@ -3617,7 +3689,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { uint64_t c2 = N1C->getZExtValue(); if (c1 + c2 >= OpSizeInBits) return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(c1 + c2, N1.getValueType())); } @@ -3639,13 +3711,34 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (c2 >= OpSizeInBits - InnerShiftSize) { if (c1 + c2 >= OpSizeInBits) return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT, - DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT, + return DAG.getNode(ISD::SHL, SDLoc(N0), VT, + DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, N0.getOperand(0)->getOperand(0)), DAG.getConstant(c1 + c2, N1.getValueType())); } } + // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) + // Only fold this if the inner zext has no other uses to avoid increasing + // the total number of instructions. + if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::SRL && + isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { + uint64_t c1 = + cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); + if (c1 < VT.getSizeInBits()) { + uint64_t c2 = N1C->getZExtValue(); + if (c1 == c2) { + SDValue NewOp0 = N0.getOperand(0); + EVT CountVT = NewOp0.getOperand(1).getValueType(); + SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), + NewOp0, DAG.getConstant(c2, CountVT)); + AddToWorkList(NewSHL.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); + } + } + } + // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or // (and (srl x, (sub c1, c2), MASK) // Only fold this if the inner shift has no other uses -- if it does, folding @@ -3660,14 +3753,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue Shift; if (c2 > c1) { Mask = Mask.shl(c2-c1); - Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), + Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(c2-c1, N1.getValueType())); } else { Mask = Mask.lshr(c1-c2); - Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), + Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(c1-c2, N1.getValueType())); } - return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift, + return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, DAG.getConstant(Mask, VT)); } } @@ -3678,7 +3771,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { VT.getSizeInBits() - N1C->getZExtValue()), VT); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), HiBitsMask); } @@ -3699,6 +3792,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (sra c1, c2) -> (sra c1, c2) if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); @@ -3724,7 +3823,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { ExtVT, VT.getVectorNumElements()); if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), DAG.getValueType(ExtVT)); } @@ -3733,7 +3832,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; - return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(Sum, N1C->getValueType(0))); } } @@ -3765,11 +3864,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy(N0.getOperand(0).getValueType())); - SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, + SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT, N0.getOperand(0), Amt); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT, + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT, Shift); - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), N->getValueType(0), Trunc); } } @@ -3785,11 +3884,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); - return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, - DAG.getNode(ISD::AND, N->getDebugLoc(), + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, + DAG.getNode(ISD::AND, SDLoc(N), TruncVT, DAG.getNode(ISD::TRUNCATE, - N->getDebugLoc(), + SDLoc(N), TruncVT, N100), DAG.getConstant(TruncC, TruncVT))); } @@ -3812,9 +3911,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue Amt = DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType())); - SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT, + SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, N0.getOperand(0).getOperand(0), Amt); - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); } } @@ -3825,7 +3924,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // If the sign bit is known to be zero, switch this to a SRL. if (DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); if (N1C) { SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); @@ -3844,6 +3943,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (srl c1, c2) -> c1 >>u c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); @@ -3868,7 +3973,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { uint64_t c2 = N1C->getZExtValue(); if (c1 + c2 >= OpSizeInBits) return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(c1 + c2, N1.getValueType())); } @@ -3886,8 +3991,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (c1 + OpSizeInBits == InnerShiftSize) { if (c1 + c2 >= InnerShiftSize) return DAG.getConstant(0, VT); - return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT, - DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, + return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, + DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT, N0.getOperand(0)->getOperand(0), DAG.getConstant(c1 + c2, ShiftCountVT))); } @@ -3897,12 +4002,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && N0.getValueSizeInBits() <= 64) { uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(~0ULL >> ShAmt, VT)); } - - // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) + // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { // Shifting in all undef bits? EVT SmallVT = N0.getOperand(0).getValueType(); @@ -3911,11 +4015,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { uint64_t ShiftAmt = N1C->getZExtValue(); - SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, + SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, N0.getOperand(0), DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); AddToWorkList(SmallShift.getNode()); - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); + APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt); + return DAG.getNode(ISD::AND, SDLoc(N), VT, + DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), + DAG.getConstant(Mask, VT)); } } @@ -3923,7 +4030,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // bit, which is unmodified by sra. if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { if (N0.getOpcode() == ISD::SRA) - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); } // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). @@ -3951,12 +4058,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue Op = N0.getOperand(0); if (ShAmt) { - Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op, + Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); AddToWorkList(Op.getNode()); } - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + return DAG.getNode(ISD::XOR, SDLoc(N), VT, Op, DAG.getConstant(1, VT)); } } @@ -3971,11 +4078,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC = TruncC.trunc(TruncVT.getSizeInBits()); - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, - DAG.getNode(ISD::AND, N->getDebugLoc(), + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, + DAG.getNode(ISD::AND, SDLoc(N), TruncVT, DAG.getNode(ISD::TRUNCATE, - N->getDebugLoc(), + SDLoc(N), TruncVT, N100), DAG.getConstant(TruncC, TruncVT))); } @@ -4035,7 +4142,7 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { // fold (ctlz c1) -> c2 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4045,7 +4152,7 @@ SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { // fold (ctlz_zero_undef c1) -> c2 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4055,7 +4162,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { // fold (cttz c1) -> c2 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4065,7 +4172,7 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { // fold (cttz_zero_undef c1) -> c2 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4075,7 +4182,7 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { // fold (ctpop c1) -> c2 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); return SDValue(); } @@ -4100,7 +4207,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return N2; // fold (select C, 1, X) -> (or C, X) if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select C, 0, 1) -> (xor C, 1) if (VT.isInteger() && (VT0 == MVT::i1 || @@ -4110,38 +4217,38 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { SDValue XORNode; if (VT == VT0) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0, + return DAG.getNode(ISD::XOR, SDLoc(N), VT0, N0, DAG.getConstant(1, VT0)); - XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0, + XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, N0, DAG.getConstant(1, VT0)); AddToWorkList(XORNode.getNode()); if (VT.bitsGT(VT0)) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode); - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); } // fold (select C, 0, X) -> (and (not C), X) if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { - SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorkList(NOTNode.getNode()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2); + return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); } // fold (select C, X, 1) -> (or (not C), X) if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { - SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorkList(NOTNode.getNode()); - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1); + return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); } // fold (select C, X, 0) -> (and C, X) if (VT == MVT::i1 && N2C && N2C->isNullValue()) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // fold (select X, X, Y) -> (or X, Y) // fold (select X, 1, Y) -> (or X, Y) if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select X, Y, X) -> (and X, Y) // fold (select X, Y, 0) -> (and X, Y) if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N1, N2)) @@ -4155,20 +4262,37 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // about, since there is no way to mark an opcode illegal at all value types if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1, N2, N0.getOperand(2)); - return SimplifySelect(N->getDebugLoc(), N0, N1, N2); + return SimplifySelect(SDLoc(N), N0, N1, N2); } return SDValue(); } +static +std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + EVT LoVT, HiVT; + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + // Split the inputs. + SDValue Lo, Hi, LL, LH, RL, RH; + llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); + + return std::make_pair(Lo, Hi); +} + SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Canonicalize integer abs. // vselect (setg[te] X, 0), X, -X -> @@ -4201,6 +4325,34 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { } } + // If the VSELECT result requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + if (N0.getOpcode() == ISD::SETCC) { + EVT VT = N->getValueType(0); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), VT) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; + llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); + llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); + llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); + + Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); + Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); + + // Add the new VSELECT nodes to the work list in case they need to be split + // again. + AddToWorkList(Lo.getNode()); + AddToWorkList(Hi.getNode()); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + } + return SDValue(); } @@ -4217,35 +4369,37 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { return N2; // Determine if the condition we're dealing with is constant - SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), - N0, N1, CC, N->getDebugLoc(), false); - if (SCC.getNode()) AddToWorkList(SCC.getNode()); + SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), + N0, N1, CC, SDLoc(N), false); + if (SCC.getNode()) { + AddToWorkList(SCC.getNode()); - if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { - if (!SCCC->isNullValue()) - return N2; // cond always true -> true val - else - return N3; // cond always false -> false val - } + if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { + if (!SCCC->isNullValue()) + return N2; // cond always true -> true val + else + return N3; // cond always false -> false val + } - // Fold to a simpler select_cc - if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC) - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(), - SCC.getOperand(0), SCC.getOperand(1), N2, N3, - SCC.getOperand(2)); + // Fold to a simpler select_cc + if (SCC.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), N2, N3, + SCC.getOperand(2)); + } // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N2, N3)) return SDValue(N, 0); // Don't revisit N. // fold select_cc into other things, such as min/max/abs - return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC); + return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); } SDValue DAGCombiner::visitSETCC(SDNode *N) { return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), cast<CondCodeSDNode>(N->getOperand(2))->get(), - N->getDebugLoc()); + SDLoc(N)); } // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: @@ -4254,7 +4408,7 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { // mentioned transformation is profitable. static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, unsigned ExtOpc, - SmallVector<SDNode*, 4> &ExtendNodes, + SmallVectorImpl<SDNode *> &ExtendNodes, const TargetLowering &TLI) { bool HasCopyToRegUses = false; bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); @@ -4312,8 +4466,8 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, return true; } -void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, - SDValue Trunc, SDValue ExtLoad, DebugLoc DL, +void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, + SDValue Trunc, SDValue ExtLoad, SDLoc DL, ISD::NodeType ExtType) { // Extend SetCC uses if necessary. for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { @@ -4340,12 +4494,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // fold (sext c1) -> c1 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0); // fold (sext (sext x)) -> (sext x) // fold (sext (aext x)) -> (sext x) if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0.getOperand(0)); if (N0.getOpcode() == ISD::TRUNCATE) { @@ -4379,22 +4533,22 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign // bits, just sext from i32. if (NumSignBits > OpBits-MidBits) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op); } else { // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign // bits, just truncate to i32. if (NumSignBits > OpBits-MidBits) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); } // fold (sext (truncate x)) -> (sextinreg x). if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, N0.getValueType())) { if (OpBits < DestBits) - Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op); + Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); else if (OpBits > DestBits) - Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op, DAG.getValueType(N0.getValueType())); } } @@ -4412,17 +4566,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4436,15 +4588,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), MemVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -4467,23 +4617,20 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->getMemoryVT(), - LN0->isVolatile(), - LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.sext(VT.getSizeInBits()); - SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ExtLoad, DAG.getConstant(Mask, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, - N0.getOperand(0).getDebugLoc(), + SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4494,13 +4641,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. if (VT.isVector() && !LegalOperations && - TLI.getBooleanContents(true) == + TLI.getBooleanContents(true) == TargetLowering::ZeroOrNegativeOneBooleanContent) { EVT N0VT = N0.getOperand(0).getValueType(); // On some architectures (such as SSE/NEON/etc) the SETCC result type is // of the same size as the compared operands. Only optimize sext(setcc()) // if this is the case. - EVT SVT = TLI.getSetCCResultType(N0VT); + EVT SVT = getSetCCResultType(N0VT); // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result @@ -4508,24 +4655,19 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == SVT.getSizeInBits()) - return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); + // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); - + EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); if (SVT == MatchingVectorType) { - SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, + SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); } } @@ -4534,24 +4676,26 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue NegOne = DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); SDValue SCC = - SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), NegOne, DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; - if (!VT.isVector() && (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))) - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, - DAG.getSetCC(N->getDebugLoc(), - TLI.getSetCCResultType(VT), - N0.getOperand(0), N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()), - NegOne, DAG.getConstant(0, VT)); + if (!VT.isVector() && + (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) { + return DAG.getSelect(SDLoc(N), VT, + DAG.getSetCC(SDLoc(N), + getSetCCResultType(VT), + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()), + NegOne, DAG.getConstant(0, VT)); + } } // fold (sext x) -> (zext x) if the sign bit is known zero. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); return SDValue(); } @@ -4600,11 +4744,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext c1) -> c1 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); // fold (zext (zext x)) -> (zext x) // fold (zext (aext x)) -> (zext x) if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0)); // fold (zext (truncate x)) -> (zext x) or @@ -4623,9 +4767,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { VT.getSizeInBits())); if (TruncatedBits == (KnownZero & TruncatedBits)) { if (VT.bitsGT(Op.getValueType())) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); if (VT.bitsLT(Op.getValueType())) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); return Op; } @@ -4665,13 +4809,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { - Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); + Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); AddToWorkList(Op.getNode()); } else if (Op.getValueType().bitsGT(VT)) { - Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); AddToWorkList(Op.getNode()); } - return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), + return DAG.getZeroExtendInReg(Op, SDLoc(N), N0.getValueType().getScalarType()); } @@ -4685,13 +4829,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { !TLI.isZExtFree(N0.getValueType(), VT))) { SDValue X = N0.getOperand(0).getOperand(0); if (X.getValueType().bitsLT(VT)) { - X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X); + X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X); } else if (X.getValueType().bitsGT(VT)) { - X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); } APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, X, DAG.getConstant(Mask, VT)); } @@ -4708,18 +4852,16 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4741,23 +4883,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, SetCCs, TLI); if (DoXform) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->getMemoryVT(), - LN0->isVolatile(), - LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ExtLoad, DAG.getConstant(Mask, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, - N0.getOperand(0).getDebugLoc(), + SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4772,15 +4911,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), MemVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -4801,11 +4938,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // for that matter). Check to see that they are the same size. If so, // we know that the element size of the sext'd result matches the // element size of the compare operands. - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::AND, SDLoc(N), VT, + DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()), - DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &OneOps[0], OneOps.size())); // If the desired elements are smaller or larger than the source @@ -4818,18 +4955,18 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); SDValue VsetCC = - DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT), - DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, + DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT), + DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &OneOps[0], OneOps.size())); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDValue SCC = - SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, VT), DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; @@ -4852,7 +4989,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { return SDValue(); } - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Ensure that the shift amount is wide enough for the shifted value. if (VT.getSizeInBits() >= 256) @@ -4872,14 +5009,14 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext c1) -> c1 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, N0); // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) // fold (aext (sext x)) -> (sext x) if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND) - return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); // fold (aext (truncate (load x))) -> (aext (smaller load x)) // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) @@ -4902,8 +5039,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { if (TruncOp.getValueType() == VT) return TruncOp; // x iff x size == zext size. if (TruncOp.getValueType().bitsGT(VT)) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp); - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp); + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp); } // Fold (aext (and (trunc x), cst)) -> (and x, cst) @@ -4915,13 +5052,13 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.getValueType())) { SDValue X = N0.getOperand(0).getOperand(0); if (X.getValueType().bitsLT(VT)) { - X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X); + X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X); } else if (X.getValueType().bitsGT(VT)) { - X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X); + X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); } APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, X, DAG.getConstant(Mask, VT)); } @@ -4938,17 +5075,15 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ANY_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4962,14 +5097,12 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.hasOneUse()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); - SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + MemVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -4986,7 +5119,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == N0VT.getSizeInBits()) - return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source @@ -5000,16 +5133,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); SDValue VsetCC = - DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); } } // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDValue SCC = - SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, VT), DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) @@ -5030,9 +5163,8 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { assert(CV != 0 && "Const value should be ConstSDNode."); const APInt &CVal = CV->getAPIntValue(); APInt NewVal = CVal & Mask; - if (NewVal != CVal) { + if (NewVal != CVal) return DAG.getConstant(NewVal, V.getValueType()); - } break; } case ISD::OR: @@ -5056,7 +5188,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { APInt NewMask = Mask << Amt; SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); if (SimplifyLHS.getNode()) - return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(), + return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, V.getOperand(1)); } } @@ -5160,12 +5292,19 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // For the transform to be legal, the load must produce only two values // (the value loaded and the chain). Don't transform a pre-increment - // load, for example, which produces an extra value. Otherwise the + // load, for example, which produces an extra value. Otherwise the // transformation is not equivalent, and the downstream logic to replace // uses gets things wrong. if (LN0->getNumValues() > 2) return SDValue(); + // If the load that we're shrinking is an extload and we're not just + // discarding the extension we can't simply shrink the load. Bail. + // TODO: It would be possible to merge the extensions in some cases. + if (LN0->getExtensionType() != ISD::NON_EXTLOAD && + LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) + return SDValue(); + EVT PtrType = N0.getOperand(1).getValueType(); if (PtrType == MVT::Untyped || PtrType.isExtended()) @@ -5182,22 +5321,22 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); - SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), + SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, LN0->getBasePtr(), DAG.getConstant(PtrOff, PtrType)); AddToWorkList(NewPtr.getNode()); SDValue Load; if (ExtType == ISD::NON_EXTLOAD) - Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, + Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), NewAlign); + LN0->isInvariant(), NewAlign, LN0->getTBAAInfo()); else - Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr, + Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - NewAlign); + NewAlign, LN0->getTBAAInfo()); // Replace the old load's chain with the new load's chain. WorkListRemover DeadNodes(*this); @@ -5216,7 +5355,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (ShLeftAmt >= VT.getSizeInBits()) Result = DAG.getConstant(0, VT); else - Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, + Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT, Result, DAG.getConstant(ShLeftAmt, ShImmTy)); } @@ -5234,7 +5373,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_in_reg c1) -> c1 if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) @@ -5242,10 +5381,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && - EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) { - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), N1); - } // fold (sext_in_reg (sext x)) -> (sext x) // fold (sext_in_reg (aext x)) -> (sext x) @@ -5254,12 +5392,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue N00 = N0.getOperand(0); if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); } // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) - return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT); + return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT); // fold operands of sext_in_reg based on knowledge that the top bits are not // demanded. @@ -5282,7 +5420,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // extended enough. unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) - return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1)); } } @@ -5294,12 +5432,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - EVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), EVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); AddToWorkList(ExtLoad.getNode()); @@ -5312,12 +5448,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - EVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), EVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -5328,7 +5462,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), N0.getOperand(1), false); if (BSwap.getNode() != 0) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } @@ -5345,21 +5479,21 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return N0; // fold (truncate c1) -> c1 if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); // fold (truncate (ext x)) -> (ext x) or (truncate x) or x if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { if (N0.getOperand(0).getValueType().bitsLT(VT)) // if the source is smaller than the dest, we still need an extend - return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); if (N0.getOperand(0).getValueType().bitsGT(VT)) // if the source is larger than the dest, than we just need the truncate - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); // if the source and dest are the same type, we can drop both the extend // and the truncate. return N0.getOperand(0); @@ -5391,14 +5525,14 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue EltNo = N0->getOperand(1); if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - EVT IndexTy = N0->getOperand(1).getValueType(); + EVT IndexTy = TLI.getVectorIdxTy(); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); - SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, N0.getOperand(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - N->getDebugLoc(), TrTy, V, + SDLoc(N), TrTy, V, DAG.getConstant(Index, IndexTy)); } } @@ -5430,7 +5564,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) Opnds.push_back(BuildVect.getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Opnds[0], + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0], Opnds.size()); } } @@ -5445,7 +5579,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits())); if (Shorter.getNode()) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); } // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) @@ -5488,11 +5622,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { Opnds.push_back(DAG.getUNDEF(VTs[i])); continue; } - SDValue NV = DAG.getNode(ISD::TRUNCATE, V.getDebugLoc(), VTs[i], V); + SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); AddToWorkList(NV.getNode()); Opnds.push_back(NV); } - return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, &Opnds[0], Opnds.size()); } } @@ -5538,7 +5672,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) - return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), + return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), LD1->getPointerInfo(), false, false, false, Align); } @@ -5575,7 +5709,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // If the input is a constant, let getNode fold it. if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { - SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0); + SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); if (Res.getNode() != N) { if (!LegalOperations || TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) @@ -5592,7 +5726,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // (conv (conv x, t1), t2) -> (conv x, t2) if (N0.getOpcode() == ISD::BITCAST) - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0.getOperand(0)); // fold (conv (load x)) -> (load (conv*)x) @@ -5600,20 +5734,22 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && // Do not change the width of a volatile load. !cast<LoadSDNode>(N0)->isVolatile() && - (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && + TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); unsigned Align = TLI.getDataLayout()-> getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); unsigned OrigAlign = LN0->getAlignment(); if (Align <= OrigAlign) { - SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), + SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), LN0->getPointerInfo(), LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), OrigAlign); + LN0->isInvariant(), OrigAlign, + LN0->getTBAAInfo()); AddToWorkList(N); CombineTo(N0.getNode(), - DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), + DAG.getNode(ISD::BITCAST, SDLoc(N0), N0.getValueType(), Load), Load.getValue(1)); return Load; @@ -5623,20 +5759,20 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) // This often reduces constant pool loads. - if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || - (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && + if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || + (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector() && !N0.getValueType().isVector()) { - SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, + SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); AddToWorkList(NewConv.getNode()); APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + return DAG.getNode(ISD::XOR, SDLoc(N), VT, NewConv, DAG.getConstant(SignBit, VT)); assert(N0.getOpcode() == ISD::FABS); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, NewConv, DAG.getConstant(~SignBit, VT)); } @@ -5650,38 +5786,38 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (isTypeLegal(IntXVT)) { - SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), + SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), IntXVT, N0.getOperand(1)); AddToWorkList(X.getNode()); // If X has a different width than the result/lhs, sext it or truncate it. unsigned VTWidth = VT.getSizeInBits(); if (OrigXWidth < VTWidth) { - X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X); + X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); AddToWorkList(X.getNode()); } else if (OrigXWidth > VTWidth) { // To get the sign bit in the right place, we have to shift it right // before truncating. - X = DAG.getNode(ISD::SRL, X.getDebugLoc(), + X = DAG.getNode(ISD::SRL, SDLoc(X), X.getValueType(), X, DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); AddToWorkList(X.getNode()); - X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); AddToWorkList(X.getNode()); } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); - X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT, + X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, VT)); AddToWorkList(X.getNode()); - SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), + SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); - Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT, + Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, Cst, DAG.getConstant(~SignBit, VT)); AddToWorkList(Cst.getNode()); - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst); + return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); } } @@ -5722,8 +5858,8 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // Due to the FP element handling below calling this routine recursively, // we can end up with a scalar-to-vector node here. if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) - return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, - DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, + DAG.getNode(ISD::BITCAST, SDLoc(BV), DstEltVT, BV->getOperand(0))); SmallVector<SDValue, 8> Ops; @@ -5732,12 +5868,12 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // If the vector element type is not legal, the BUILD_VECTOR operands // are promoted and implicitly truncated. Make that explicit here. if (Op.getValueType() != SrcEltVT) - Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); - Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); + Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), DstEltVT, Op)); AddToWorkList(Ops.back().getNode()); } - return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, &Ops[0], Ops.size()); } @@ -5794,7 +5930,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { } EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, &Ops[0], Ops.size()); } @@ -5821,7 +5957,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) // Simply turn this into a SCALAR_TO_VECTOR of the new type. - return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, Ops[0]); OpVal = OpVal.lshr(DstBitSize); } @@ -5831,7 +5967,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); } - return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, &Ops[0], Ops.size()); } @@ -5850,10 +5986,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); // fold (fadd A, 0) -> A if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) @@ -5861,20 +5997,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) - return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) - return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, + return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), - DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(1), N1)); // No FP constant should be created after legalization as Instruction @@ -5883,22 +6019,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // We don't need test this condition for transformation like following, as // the DAG being transformed implies it is legal to take FP constant as // operand. - // + // // (fadd (fmul c, x), x) -> (fmul c+1, x) - // + // bool AllowNewFpConst = (Level < AfterLegalizeDAG); // If allow, fold (fadd (fneg x), x) -> 0.0 if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && - N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) { + N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) return DAG.getConstantFP(0.0, VT); - } // If allow, fold (fadd x, (fneg x)) -> 0.0 if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && - N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) { + N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) return DAG.getConstantFP(0.0, VT); - } // In unsafe math mode, we can fold chains of FADD's of the same value // into multiplications. This transform is not safe in general because @@ -5910,43 +6044,43 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); - // (fadd (fmul c, x), x) -> (fmul c+1, x) + // (fadd (fmul c, x), x) -> (fmul x, c+1) if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP00, 0), DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); } - // (fadd (fmul x, c), x) -> (fmul c+1, x) + // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP01, 0), DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); } - // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x) + // (fadd (fmul c, x), (fadd x, x)) -> (fmul x, c+2) if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(1) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP00, 0), DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(1), NewCFP); } - // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x) + // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP01, 0), DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), NewCFP); } } @@ -5955,98 +6089,93 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); - // (fadd x, (fmul c, x)) -> (fmul c+1, x) + // (fadd x, (fmul c, x)) -> (fmul x, c+1) if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP10, 0), DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); } - // (fadd x, (fmul x, c)) -> (fmul c+1, x) + // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP11, 0), DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); } - // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x) - if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(1) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + // (fadd (fadd x, x), (fmul c, x)) -> (fmul x, c+2) + if (CFP10 && !CFP11 && N0.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(1) == N0.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP10, 0), DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, - N0.getOperand(1), NewCFP); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + N1.getOperand(1), NewCFP); } - // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x) - if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) + if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N0.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP11, 0), DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, - N0.getOperand(0), NewCFP); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + N1.getOperand(0), NewCFP); } } if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) { ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); - // (fadd (fadd x, x), x) -> (fmul 3.0, x) + // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP && N0.getOperand(0) == N0.getOperand(1) && - (N0.getOperand(0) == N1)) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + (N0.getOperand(0) == N1)) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, DAG.getConstantFP(3.0, VT)); - } } if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) { ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); - // (fadd x, (fadd x, x)) -> (fmul 3.0, x) + // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && - N1.getOperand(0) == N0) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1.getOperand(0) == N0) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, DAG.getConstantFP(3.0, VT)); - } } - // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) + // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) if (AllowNewFpConst && N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0) == N1.getOperand(0)) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstantFP(4.0, VT)); - } } // FADD -> FMA combines: if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && - DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { + DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fadd (fmul x, y), z) -> (fma x, y, z) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1); - } // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1), N0); - } } return SDValue(); @@ -6058,7 +6187,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // fold vector ops if (VT.isVector()) { @@ -6068,7 +6197,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1); // fold (fsub A, 0) -> A if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) @@ -6101,8 +6230,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &DAG.getTarget().Options)) return GetNegatedExpression(N11, DAG, LegalOperations); - else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, - &DAG.getTarget().Options)) + + if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, + &DAG.getTarget().Options)) return GetNegatedExpression(N10, DAG, LegalOperations); } } @@ -6110,27 +6240,25 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // FSUB -> FMA combines: if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && - DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { + DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) return DAG.getNode(ISD::FMA, dl, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(ISD::FNEG, dl, VT, N1)); - } // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) return DAG.getNode(ISD::FMA, dl, VT, DAG.getNode(ISD::FNEG, dl, VT, N1.getOperand(0)), N1.getOperand(1), N0); - } - // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) - if (N0.getOpcode() == ISD::FNEG && + // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0).getOpcode() == ISD::FMUL && N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { SDValue N00 = N0.getOperand(0).getOperand(0); @@ -6160,10 +6288,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold (fmul c1, c2) -> c1*c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); // fold (fmul A, 0) -> 0 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) @@ -6177,21 +6305,21 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return N0; // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0); // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, GetNegatedExpression(N0, DAG, LegalOperations), GetNegatedExpression(N1, DAG, LegalOperations)); } @@ -6201,8 +6329,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL && N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), - DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(1), N1)); return SDValue(); @@ -6215,7 +6343,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (DAG.getTarget().Options.UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) @@ -6224,13 +6352,13 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return N2; } if (N0CFP && N0CFP->isExactlyValue(1.0)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); if (N1CFP && N1CFP->isExactlyValue(1.0)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N2); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); // Canonicalize (fma c, x, y) -> (fma x, c, y) if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2); + return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && @@ -6267,21 +6395,17 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } // (fma x, c, x) -> (fmul x, (c+1)) - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) { - return DAG.getNode(ISD::FMUL, dl, VT, - N0, + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) + return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, N1, DAG.getConstantFP(1.0, VT))); - } // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && - N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { - return DAG.getNode(ISD::FMUL, dl, VT, - N0, + N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) + return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, N1, DAG.getConstantFP(-1.0, VT))); - } return SDValue(); @@ -6303,7 +6427,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // fold (fdiv c1, c2) -> c1/c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) { @@ -6320,7 +6444,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || TLI.isFPImmLegal(Recip, VT))) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, DAG.getConstantFP(Recip, VT)); } @@ -6332,7 +6456,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, GetNegatedExpression(N0, DAG, LegalOperations), GetNegatedExpression(N1, DAG, LegalOperations)); } @@ -6350,7 +6474,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP) - return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1); return SDValue(); } @@ -6363,7 +6487,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { EVT VT = N->getValueType(0); if (N0CFP && N1CFP) // Constant fold - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); if (N1CFP) { const APFloat& V = N1CFP->getValueAPF(); @@ -6371,11 +6495,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) if (!V.isNegative()) { if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) - return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); } else { if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, - DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0)); + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, + DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); } } @@ -6384,22 +6508,22 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(copysign(x,z), y) -> copysign(x, y) if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1); // copysign(x, abs(y)) -> abs(x) if (N1.getOpcode() == ISD::FABS) - return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // copysign(x, copysign(y,z)) -> copysign(x, z) if (N1.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1)); // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); return SDValue(); @@ -6416,7 +6540,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) - return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); // If the input is a legal type, and SINT_TO_FP is not legal on this target, // but UINT_TO_FP is legal on this target, try to convert. @@ -6424,7 +6548,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { // If the sign bit is known to be zero, we can change this to UINT_TO_FP. if (DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); } // The next optimizations are desireable only if SELECT_CC can be lowered. @@ -6442,7 +6566,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { { N0.getOperand(0), N0.getOperand(1), DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); } // fold (sint_to_fp (zext (setcc x, y, cc))) -> @@ -6455,7 +6579,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), N0.getOperand(0).getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); } } @@ -6473,7 +6597,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) - return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); // If the input is a legal type, and UINT_TO_FP is not legal on this target, // but SINT_TO_FP is legal on this target, try to convert. @@ -6481,7 +6605,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { // If the sign bit is known to be zero, we can change this to SINT_TO_FP. if (DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); } // The next optimizations are desireable only if SELECT_CC can be lowered. @@ -6499,7 +6623,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { { N0.getOperand(0), N0.getOperand(1), DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); } } @@ -6513,7 +6637,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { // fold (fp_to_sint c1fp) -> c1 if (N0CFP) - return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); return SDValue(); } @@ -6525,7 +6649,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { // fold (fp_to_uint c1fp) -> c1 if (N0CFP) - return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); return SDValue(); } @@ -6538,7 +6662,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // fold (fp_round c1fp) -> c1fp if (N0CFP) - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); // fold (fp_round (fp_extend x)) -> x if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) @@ -6549,16 +6673,16 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // This is a value preserving truncation if both round's are. bool IsTrunc = N->getConstantOperandVal(1) == 1 && N0.getNode()->getConstantOperandVal(1) == 1; - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), DAG.getIntPtrConstant(IsTrunc)); } // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { - SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT, + SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorkList(Tmp.getNode()); - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, Tmp, N0.getOperand(1)); } @@ -6574,7 +6698,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { // fold (fp_round_inreg c1fp) -> c1fp if (N0CFP && isTypeLegal(EVT)) { SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); - return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round); + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round); } return SDValue(); @@ -6592,7 +6716,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { // fold (fp_extend c1fp) -> c1fp if (N0CFP) - return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the // value of X. @@ -6601,25 +6725,23 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue In = N0.getOperand(0); if (In.getValueType() == VT) return In; if (VT.bitsLT(In.getValueType())) - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, In, N0.getOperand(1)); - return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In); + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); } // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), - DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), + DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -6650,10 +6772,10 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { - Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, + Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); AddToWorkList(Int.getNode()); - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); } } @@ -6661,12 +6783,11 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // (fneg (fmul c, x)) -> (fmul -c, x) if (N0.getOpcode() == ISD::FMUL) { ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); - if (CFP1) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + if (CFP1) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); - } } return SDValue(); @@ -6679,7 +6800,7 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) { // fold (fceil c1) -> fceil(c1) if (N0CFP) - return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); return SDValue(); } @@ -6691,7 +6812,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { // fold (ftrunc c1) -> ftrunc(c1) if (N0CFP) - return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); return SDValue(); } @@ -6703,7 +6824,7 @@ SDValue DAGCombiner::visitFFLOOR(SDNode *N) { // fold (ffloor c1) -> ffloor(c1) if (N0CFP) - return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); return SDValue(); } @@ -6720,28 +6841,28 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // fold (fabs c1) -> fabs(c1) if (N0CFP) - return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // fold (fabs (fabs x)) -> (fabs x) if (N0.getOpcode() == ISD::FABS) return N->getOperand(0); // fold (fabs (fneg x)) -> (fabs x) // fold (fabs (fcopysign x, y)) -> (fabs x) if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0)); + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading // constant pool values. - if (!TLI.isFAbsFree(VT) && + if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { - Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, + Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); AddToWorkList(Int.getNode()); - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); } } @@ -6765,7 +6886,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (N1.getOpcode() == ISD::SETCC && TLI.isOperationLegalOrCustom(ISD::BR_CC, N1.getOperand(0).getValueType())) { - return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain, N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2); } @@ -6811,12 +6932,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (AndConst.isPowerOf2() && cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { SDValue SetCC = - DAG.getSetCC(N->getDebugLoc(), - TLI.getSetCCResultType(Op0.getValueType()), + DAG.getSetCC(SDLoc(N), + getSetCCResultType(Op0.getValueType()), Op0, DAG.getConstant(0, Op0.getValueType()), ISD::SETNE); - SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, SetCC, N2); // Don't add the new BRCond into the worklist or else SimplifySelectCC // will convert it back to (X & C1) >> C2. @@ -6861,7 +6982,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { DAG.ReplaceAllUsesOfValueWith(N1, Tmp); removeFromWorkList(TheXor); DAG.DeleteNode(TheXor); - return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, Tmp, N2); } @@ -6882,8 +7003,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { EVT SetCCVT = N1.getValueType(); if (LegalTypes) - SetCCVT = TLI.getSetCCResultType(SetCCVT); - SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), + SetCCVT = getSetCCResultType(SetCCVT); + SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1, Equal ? ISD::SETEQ : ISD::SETNE); @@ -6892,7 +7013,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { DAG.ReplaceAllUsesOfValueWith(N1, SetCC); removeFromWorkList(N1.getNode()); DAG.DeleteNode(N1.getNode()); - return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, SetCC, N2); } } @@ -6913,14 +7034,14 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { // MachineBasicBlock CFG, which is awkward. // Use SimplifySetCC to simplify SETCC's. - SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), - CondLHS, CondRHS, CC->get(), N->getDebugLoc(), + SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), + CondLHS, CondRHS, CC->get(), SDLoc(N), false); if (Simp.getNode()) AddToWorkList(Simp.getNode()); // fold to a simpler setcc if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) - return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, N->getOperand(0), Simp.getOperand(2), Simp.getOperand(0), Simp.getOperand(1), N->getOperand(4)); @@ -7118,10 +7239,10 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { SDValue Result; if (isLoad) - Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM); else - Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM); ++PreIndexedNodes; ++NodesCombined; @@ -7156,7 +7277,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // x0 * offset0 + y0 * ptr0 = t0 // knowing that // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) - // + // // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the // indexed load/store and the expresion that needs to be re-written. // @@ -7186,7 +7307,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); SDValue NewUse = DAG.getNode(Opcode, - OtherUses[i]->getDebugLoc(), + SDLoc(OtherUses[i]), OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); removeFromWorkList(OtherUses[i]); @@ -7278,7 +7399,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { for (SDNode::use_iterator III = Use->use_begin(), EEE = Use->use_end(); III != EEE; ++III) { SDNode *UseUse = *III; - if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) + if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) RealUse = true; } @@ -7295,9 +7416,9 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // Check for #2 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { SDValue Result = isLoad - ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM) - : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; @@ -7403,17 +7524,20 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getMemOperand()->getBaseAlignment()) { SDValue NewLoad = - DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), + DAG.getExtLoad(LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), - LD->isVolatile(), LD->isNonTemporal(), Align); + LD->isVolatile(), LD->isNonTemporal(), Align, + LD->getTBAAInfo()); return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } } } - if (CombinerAA) { + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : + TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + if (UseAA) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -7423,22 +7547,17 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Replace the chain to void dependency. if (LD->getExtensionType() == ISD::NON_EXTLOAD) { - ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), - BetterChain, Ptr, LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), + BetterChain, Ptr, LD->getMemOperand()); } else { - ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), LD->getValueType(0), - BetterChain, Ptr, LD->getPointerInfo(), - LD->getMemoryVT(), - LD->isVolatile(), - LD->isNonTemporal(), - LD->getAlignment()); + BetterChain, Ptr, LD->getMemoryVT(), + LD->getMemOperand()); } // Create token factor to keep old chain connected. - SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Chain, ReplLoad.getValue(1)); // Make sure the new and old chains are cleaned up. @@ -7454,9 +7573,562 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); + // Try to slice up N to more direct loads if the slices are mapped to + // different register banks or pairing can take place. + if (SliceUpLoad(N)) + return SDValue(N, 0); + return SDValue(); } +namespace { +/// \brief Helper structure used to slice a load in smaller loads. +/// Basically a slice is obtained from the following sequence: +/// Origin = load Ty1, Base +/// Shift = srl Ty1 Origin, CstTy Amount +/// Inst = trunc Shift to Ty2 +/// +/// Then, it will be rewriten into: +/// Slice = load SliceTy, Base + SliceOffset +/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 +/// +/// SliceTy is deduced from the number of bits that are actually used to +/// build Inst. +struct LoadedSlice { + /// \brief Helper structure used to compute the cost of a slice. + struct Cost { + /// Are we optimizing for code size. + bool ForCodeSize; + /// Various cost. + unsigned Loads; + unsigned Truncates; + unsigned CrossRegisterBanksCopies; + unsigned ZExts; + unsigned Shift; + + Cost(bool ForCodeSize = false) + : ForCodeSize(ForCodeSize), Loads(0), Truncates(0), + CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {} + + /// \brief Get the cost of one isolated slice. + Cost(const LoadedSlice &LS, bool ForCodeSize = false) + : ForCodeSize(ForCodeSize), Loads(1), Truncates(0), + CrossRegisterBanksCopies(0), ZExts(0), Shift(0) { + EVT TruncType = LS.Inst->getValueType(0); + EVT LoadedType = LS.getLoadedType(); + if (TruncType != LoadedType && + !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) + ZExts = 1; + } + + /// \brief Account for slicing gain in the current cost. + /// Slicing provide a few gains like removing a shift or a + /// truncate. This method allows to grow the cost of the original + /// load with the gain from this slice. + void addSliceGain(const LoadedSlice &LS) { + // Each slice saves a truncate. + const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); + if (!TLI.isTruncateFree(LS.Inst->getValueType(0), + LS.Inst->getOperand(0).getValueType())) + ++Truncates; + // If there is a shift amount, this slice gets rid of it. + if (LS.Shift) + ++Shift; + // If this slice can merge a cross register bank copy, account for it. + if (LS.canMergeExpensiveCrossRegisterBankCopy()) + ++CrossRegisterBanksCopies; + } + + Cost &operator+=(const Cost &RHS) { + Loads += RHS.Loads; + Truncates += RHS.Truncates; + CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; + ZExts += RHS.ZExts; + Shift += RHS.Shift; + return *this; + } + + bool operator==(const Cost &RHS) const { + return Loads == RHS.Loads && Truncates == RHS.Truncates && + CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && + ZExts == RHS.ZExts && Shift == RHS.Shift; + } + + bool operator!=(const Cost &RHS) const { return !(*this == RHS); } + + bool operator<(const Cost &RHS) const { + // Assume cross register banks copies are as expensive as loads. + // FIXME: Do we want some more target hooks? + unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; + unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; + // Unless we are optimizing for code size, consider the + // expensive operation first. + if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) + return ExpensiveOpsLHS < ExpensiveOpsRHS; + return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < + (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); + } + + bool operator>(const Cost &RHS) const { return RHS < *this; } + + bool operator<=(const Cost &RHS) const { return !(RHS < *this); } + + bool operator>=(const Cost &RHS) const { return !(*this < RHS); } + }; + // The last instruction that represent the slice. This should be a + // truncate instruction. + SDNode *Inst; + // The original load instruction. + LoadSDNode *Origin; + // The right shift amount in bits from the original load. + unsigned Shift; + // The DAG from which Origin came from. + // This is used to get some contextual information about legal types, etc. + SelectionDAG *DAG; + + LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL, + unsigned Shift = 0, SelectionDAG *DAG = NULL) + : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} + + LoadedSlice(const LoadedSlice &LS) + : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {} + + /// \brief Get the bits used in a chunk of bits \p BitWidth large. + /// \return Result is \p BitWidth and has used bits set to 1 and + /// not used bits set to 0. + APInt getUsedBits() const { + // Reproduce the trunc(lshr) sequence: + // - Start from the truncated value. + // - Zero extend to the desired bit width. + // - Shift left. + assert(Origin && "No original load to compare against."); + unsigned BitWidth = Origin->getValueSizeInBits(0); + assert(Inst && "This slice is not bound to an instruction"); + assert(Inst->getValueSizeInBits(0) <= BitWidth && + "Extracted slice is bigger than the whole type!"); + APInt UsedBits(Inst->getValueSizeInBits(0), 0); + UsedBits.setAllBits(); + UsedBits = UsedBits.zext(BitWidth); + UsedBits <<= Shift; + return UsedBits; + } + + /// \brief Get the size of the slice to be loaded in bytes. + unsigned getLoadedSize() const { + unsigned SliceSize = getUsedBits().countPopulation(); + assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); + return SliceSize / 8; + } + + /// \brief Get the type that will be loaded for this slice. + /// Note: This may not be the final type for the slice. + EVT getLoadedType() const { + assert(DAG && "Missing context"); + LLVMContext &Ctxt = *DAG->getContext(); + return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); + } + + /// \brief Get the alignment of the load used for this slice. + unsigned getAlignment() const { + unsigned Alignment = Origin->getAlignment(); + unsigned Offset = getOffsetFromBase(); + if (Offset != 0) + Alignment = MinAlign(Alignment, Alignment + Offset); + return Alignment; + } + + /// \brief Check if this slice can be rewritten with legal operations. + bool isLegal() const { + // An invalid slice is not legal. + if (!Origin || !Inst || !DAG) + return false; + + // Offsets are for indexed load only, we do not handle that. + if (Origin->getOffset().getOpcode() != ISD::UNDEF) + return false; + + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + + // Check that the type is legal. + EVT SliceType = getLoadedType(); + if (!TLI.isTypeLegal(SliceType)) + return false; + + // Check that the load is legal for this type. + if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) + return false; + + // Check that the offset can be computed. + // 1. Check its type. + EVT PtrType = Origin->getBasePtr().getValueType(); + if (PtrType == MVT::Untyped || PtrType.isExtended()) + return false; + + // 2. Check that it fits in the immediate. + if (!TLI.isLegalAddImmediate(getOffsetFromBase())) + return false; + + // 3. Check that the computation is legal. + if (!TLI.isOperationLegal(ISD::ADD, PtrType)) + return false; + + // Check that the zext is legal if it needs one. + EVT TruncateType = Inst->getValueType(0); + if (TruncateType != SliceType && + !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) + return false; + + return true; + } + + /// \brief Get the offset in bytes of this slice in the original chunk of + /// bits. + /// \pre DAG != NULL. + uint64_t getOffsetFromBase() const { + assert(DAG && "Missing context."); + bool IsBigEndian = + DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian(); + assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); + uint64_t Offset = Shift / 8; + unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; + assert(!(Origin->getValueSizeInBits(0) & 0x7) && + "The size of the original loaded type is not a multiple of a" + " byte."); + // If Offset is bigger than TySizeInBytes, it means we are loading all + // zeros. This should have been optimized before in the process. + assert(TySizeInBytes > Offset && + "Invalid shift amount for given loaded size"); + if (IsBigEndian) + Offset = TySizeInBytes - Offset - getLoadedSize(); + return Offset; + } + + /// \brief Generate the sequence of instructions to load the slice + /// represented by this object and redirect the uses of this slice to + /// this new sequence of instructions. + /// \pre this->Inst && this->Origin are valid Instructions and this + /// object passed the legal check: LoadedSlice::isLegal returned true. + /// \return The last instruction of the sequence used to load the slice. + SDValue loadSlice() const { + assert(Inst && Origin && "Unable to replace a non-existing slice."); + const SDValue &OldBaseAddr = Origin->getBasePtr(); + SDValue BaseAddr = OldBaseAddr; + // Get the offset in that chunk of bytes w.r.t. the endianess. + int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); + assert(Offset >= 0 && "Offset too big to fit in int64_t!"); + if (Offset) { + // BaseAddr = BaseAddr + Offset. + EVT ArithType = BaseAddr.getValueType(); + BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr, + DAG->getConstant(Offset, ArithType)); + } + + // Create the type of the loaded slice according to its size. + EVT SliceType = getLoadedType(); + + // Create the load for the slice. + SDValue LastInst = DAG->getLoad( + SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, + Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(), + Origin->isNonTemporal(), Origin->isInvariant(), getAlignment()); + // If the final type is not the same as the loaded type, this means that + // we have to pad with zero. Create a zero extend for that. + EVT FinalType = Inst->getValueType(0); + if (SliceType != FinalType) + LastInst = + DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); + return LastInst; + } + + /// \brief Check if this slice can be merged with an expensive cross register + /// bank copy. E.g., + /// i = load i32 + /// f = bitcast i32 i to float + bool canMergeExpensiveCrossRegisterBankCopy() const { + if (!Inst || !Inst->hasOneUse()) + return false; + SDNode *Use = *Inst->use_begin(); + if (Use->getOpcode() != ISD::BITCAST) + return false; + assert(DAG && "Missing context"); + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + EVT ResVT = Use->getValueType(0); + const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); + const TargetRegisterClass *ArgRC = + TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); + if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) + return false; + + // At this point, we know that we perform a cross-register-bank copy. + // Check if it is expensive. + const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo(); + // Assume bitcasts are cheap, unless both register classes do not + // explicitly share a common sub class. + if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) + return false; + + // Check if it will be merged with the load. + // 1. Check the alignment constraint. + unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment( + ResVT.getTypeForEVT(*DAG->getContext())); + + if (RequiredAlignment > getAlignment()) + return false; + + // 2. Check that the load is a legal operation for that type. + if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) + return false; + + // 3. Check that we do not have a zext in the way. + if (Inst->getValueType(0) != getLoadedType()) + return false; + + return true; + } +}; +} + +/// \brief Sorts LoadedSlice according to their offset. +struct LoadedSliceSorter { + bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) { + assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); + return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); + } +}; + +/// \brief Check that all bits set in \p UsedBits form a dense region, i.e., +/// \p UsedBits looks like 0..0 1..1 0..0. +static bool areUsedBitsDense(const APInt &UsedBits) { + // If all the bits are one, this is dense! + if (UsedBits.isAllOnesValue()) + return true; + + // Get rid of the unused bits on the right. + APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); + // Get rid of the unused bits on the left. + if (NarrowedUsedBits.countLeadingZeros()) + NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); + // Check that the chunk of bits is completely used. + return NarrowedUsedBits.isAllOnesValue(); +} + +/// \brief Check whether or not \p First and \p Second are next to each other +/// in memory. This means that there is no hole between the bits loaded +/// by \p First and the bits loaded by \p Second. +static bool areSlicesNextToEachOther(const LoadedSlice &First, + const LoadedSlice &Second) { + assert(First.Origin == Second.Origin && First.Origin && + "Unable to match different memory origins."); + APInt UsedBits = First.getUsedBits(); + assert((UsedBits & Second.getUsedBits()) == 0 && + "Slices are not supposed to overlap."); + UsedBits |= Second.getUsedBits(); + return areUsedBitsDense(UsedBits); +} + +/// \brief Adjust the \p GlobalLSCost according to the target +/// paring capabilities and the layout of the slices. +/// \pre \p GlobalLSCost should account for at least as many loads as +/// there is in the slices in \p LoadedSlices. +static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, + LoadedSlice::Cost &GlobalLSCost) { + unsigned NumberOfSlices = LoadedSlices.size(); + // If there is less than 2 elements, no pairing is possible. + if (NumberOfSlices < 2) + return; + + // Sort the slices so that elements that are likely to be next to each + // other in memory are next to each other in the list. + std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter()); + const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); + // First (resp. Second) is the first (resp. Second) potentially candidate + // to be placed in a paired load. + const LoadedSlice *First = NULL; + const LoadedSlice *Second = NULL; + for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, + // Set the beginning of the pair. + First = Second) { + + Second = &LoadedSlices[CurrSlice]; + + // If First is NULL, it means we start a new pair. + // Get to the next slice. + if (!First) + continue; + + EVT LoadedType = First->getLoadedType(); + + // If the types of the slices are different, we cannot pair them. + if (LoadedType != Second->getLoadedType()) + continue; + + // Check if the target supplies paired loads for this type. + unsigned RequiredAlignment = 0; + if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { + // move to the next pair, this type is hopeless. + Second = NULL; + continue; + } + // Check if we meet the alignment requirement. + if (RequiredAlignment > First->getAlignment()) + continue; + + // Check that both loads are next to each other in memory. + if (!areSlicesNextToEachOther(*First, *Second)) + continue; + + assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); + --GlobalLSCost.Loads; + // Move to the next pair. + Second = NULL; + } +} + +/// \brief Check the profitability of all involved LoadedSlice. +/// Currently, it is considered profitable if there is exactly two +/// involved slices (1) which are (2) next to each other in memory, and +/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). +/// +/// Note: The order of the elements in \p LoadedSlices may be modified, but not +/// the elements themselves. +/// +/// FIXME: When the cost model will be mature enough, we can relax +/// constraints (1) and (2). +static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, + const APInt &UsedBits, bool ForCodeSize) { + unsigned NumberOfSlices = LoadedSlices.size(); + if (StressLoadSlicing) + return NumberOfSlices > 1; + + // Check (1). + if (NumberOfSlices != 2) + return false; + + // Check (2). + if (!areUsedBitsDense(UsedBits)) + return false; + + // Check (3). + LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); + // The original code has one big load. + OrigCost.Loads = 1; + for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { + const LoadedSlice &LS = LoadedSlices[CurrSlice]; + // Accumulate the cost of all the slices. + LoadedSlice::Cost SliceCost(LS, ForCodeSize); + GlobalSlicingCost += SliceCost; + + // Account as cost in the original configuration the gain obtained + // with the current slices. + OrigCost.addSliceGain(LS); + } + + // If the target supports paired load, adjust the cost accordingly. + adjustCostForPairing(LoadedSlices, GlobalSlicingCost); + return OrigCost > GlobalSlicingCost; +} + +/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) +/// operations, split it in the various pieces being extracted. +/// +/// This sort of thing is introduced by SROA. +/// This slicing takes care not to insert overlapping loads. +/// \pre LI is a simple load (i.e., not an atomic or volatile load). +bool DAGCombiner::SliceUpLoad(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + LoadSDNode *LD = cast<LoadSDNode>(N); + if (LD->isVolatile() || !ISD::isNormalLoad(LD) || + !LD->getValueType(0).isInteger()) + return false; + + // Keep track of already used bits to detect overlapping values. + // In that case, we will just abort the transformation. + APInt UsedBits(LD->getValueSizeInBits(0), 0); + + SmallVector<LoadedSlice, 4> LoadedSlices; + + // Check if this load is used as several smaller chunks of bits. + // Basically, look for uses in trunc or trunc(lshr) and record a new chain + // of computation for each trunc. + for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); + UI != UIEnd; ++UI) { + // Skip the uses of the chain. + if (UI.getUse().getResNo() != 0) + continue; + + SDNode *User = *UI; + unsigned Shift = 0; + + // Check if this is a trunc(lshr). + if (User->getOpcode() == ISD::SRL && User->hasOneUse() && + isa<ConstantSDNode>(User->getOperand(1))) { + Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue(); + User = *User->use_begin(); + } + + // At this point, User is a Truncate, iff we encountered, trunc or + // trunc(lshr). + if (User->getOpcode() != ISD::TRUNCATE) + return false; + + // The width of the type must be a power of 2 and greater than 8-bits. + // Otherwise the load cannot be represented in LLVM IR. + // Moreover, if we shifted with a non 8-bits multiple, the slice + // will be accross several bytes. We do not support that. + unsigned Width = User->getValueSizeInBits(0); + if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) + return 0; + + // Build the slice for this chain of computations. + LoadedSlice LS(User, LD, Shift, &DAG); + APInt CurrentUsedBits = LS.getUsedBits(); + + // Check if this slice overlaps with another. + if ((CurrentUsedBits & UsedBits) != 0) + return false; + // Update the bits used globally. + UsedBits |= CurrentUsedBits; + + // Check if the new slice would be legal. + if (!LS.isLegal()) + return false; + + // Record the slice. + LoadedSlices.push_back(LS); + } + + // Abort slicing if it does not seem to be profitable. + if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) + return false; + + ++SlicedLoads; + + // Rewrite each chain to use an independent load. + // By construction, each chain can be represented by a unique load. + + // Prepare the argument for the new token factor for all the slices. + SmallVector<SDValue, 8> ArgChains; + for (SmallVectorImpl<LoadedSlice>::const_iterator + LSIt = LoadedSlices.begin(), + LSItEnd = LoadedSlices.end(); + LSIt != LSItEnd; ++LSIt) { + SDValue SliceInst = LSIt->loadSlice(); + CombineTo(LSIt->Inst, SliceInst, true); + if (SliceInst.getNode()->getOpcode() != ISD::LOAD) + SliceInst = SliceInst.getOperand(0); + assert(SliceInst->getOpcode() == ISD::LOAD && + "It takes more than a zext to get to the loaded slice!!"); + ArgChains.push_back(SliceInst.getValue(1)); + } + + SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, + &ArgChains[0], ArgChains.size()); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); + return true; +} + /// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the /// load is having specific bytes cleared out. If so, return the byte size /// being masked out and the shift amount. @@ -7500,9 +8172,9 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { // 0 and the bits being kept are 1. Use getSExtValue so that leading bits // follow the sign bit for uniformity. uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); - unsigned NotMaskLZ = CountLeadingZeros_64(NotMask); + unsigned NotMaskLZ = countLeadingZeros(NotMask); if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. - unsigned NotMaskTZ = CountTrailingZeros_64(NotMask); + unsigned NotMaskTZ = countTrailingZeros(NotMask); if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. if (NotMaskLZ == 64) return Result; // All zero mask. @@ -7559,7 +8231,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. if (ByteShift) - IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal, + IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal, DAG.getConstant(ByteShift*8, DC->getShiftAmountTy(IVal.getValueType()))); @@ -7574,16 +8246,16 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, SDValue Ptr = St->getBasePtr(); if (StOffset) { - Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(), + Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(), Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); NewAlign = MinAlign(NewAlign, StOffset); } // Truncate down to the new size. - IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); + IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); ++OpsNarrowed; - return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, + return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr, St->getPointerInfo().getWithOffset(StOffset), false, false, NewAlign).getNode(); } @@ -7684,17 +8356,18 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) return SDValue(); - SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), + SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), Ptr.getValueType(), Ptr, DAG.getConstant(PtrOff, Ptr.getValueType())); - SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), + SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), NewAlign); - SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, + LD->isInvariant(), NewAlign, + LD->getTBAAInfo()); + SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); - SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), + SDValue NewST = DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, ST->getPointerInfo().getWithOffset(PtrOff), false, false, NewAlign); @@ -7747,12 +8420,12 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { if (LDAlign < ABIAlign || STAlign < ABIAlign) return SDValue(); - SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), + SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), false, false, false, LDAlign); - SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), + SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(), ST->getPointerInfo(), false, false, STAlign); @@ -7802,17 +8475,28 @@ struct BaseIndexOffset { static BaseIndexOffset match(SDValue Ptr) { bool IsIndexSignExt = false; - // Just Base or possibly anything else. + // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD + // instruction, then it could be just the BASE or everything else we don't + // know how to handle. Just use Ptr as BASE and give up. if (Ptr->getOpcode() != ISD::ADD) return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); - // Base + offset. + // We know that we have at least an ADD instruction. Try to pattern match + // the simple case of BASE + OFFSET. if (isa<ConstantSDNode>(Ptr->getOperand(1))) { int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, IsIndexSignExt); } + // Inside a loop the current BASE pointer is calculated using an ADD and a + // MUL instruction. In this case Ptr is the actual BASE pointer. + // (i64 add (i64 %array_ptr) + // (i64 mul (i64 %induction_var) + // (i64 %element_size))) + if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + // Look at Base + Index + Offset cases. SDValue Base = Ptr->getOperand(0); SDValue IndexOffset = Ptr->getOperand(1); @@ -7963,6 +8647,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { Index = STn; break; } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { + if (Ldn->isVolatile()) { + Index = NULL; + break; + } + // Save the load node for later. Continue the scan. AliasLoadNodes.push_back(Ldn); NextInChain = Ldn->getChain().getNode(); @@ -8080,7 +8769,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // The earliest Node in the DAG. LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; - DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc(); + SDLoc DL(StoreNodes[0].MemNode); SDValue StoredVal; if (UseVector) { @@ -8276,8 +8965,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW); } - DebugLoc LoadDL = LoadNodes[0].MemNode->getDebugLoc(); - DebugLoc StoreDL = StoreNodes[0].MemNode->getDebugLoc(); + SDLoc LoadDL(LoadNodes[0].MemNode); + SDLoc StoreDL(StoreNodes[0].MemNode); LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, @@ -8338,9 +9027,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (Align <= OrigAlign && ((!LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) - return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), + return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), OrigAlign); + ST->isNonTemporal(), OrigAlign, + ST->getTBAAInfo()); } // Turn 'store undef, Ptr' -> nothing. @@ -8355,7 +9045,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // transform should not be done in this case. if (Value.getOpcode() != ISD::TargetConstantFP) { SDValue Tmp; - switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { + switch (CFP->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unknown FP type"); case MVT::f16: // We don't do this for these yet. case MVT::f80: @@ -8367,9 +9057,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). bitcastToAPInt().getZExtValue(), MVT::i32); - return DAG.getStore(Chain, N->getDebugLoc(), Tmp, - Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + return DAG.getStore(Chain, SDLoc(N), Tmp, + Ptr, ST->getMemOperand()); } break; case MVT::f64: @@ -8378,9 +9067,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). getZExtValue(), MVT::i64); - return DAG.getStore(Chain, N->getDebugLoc(), Tmp, - Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + return DAG.getStore(Chain, SDLoc(N), Tmp, + Ptr, ST->getMemOperand()); } if (!ST->isVolatile() && @@ -8396,19 +9084,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); - SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, + SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, - ST->getAlignment()); - Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, + ST->getAlignment(), TBAAInfo); + Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); Alignment = MinAlign(Alignment, 4U); - SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, + SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, - Alignment); - return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + Alignment, TBAAInfo); + return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, St0, St1); } @@ -8421,9 +9110,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > ST->getAlignment()) - return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, + return DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), Align); + ST->isVolatile(), ST->isNonTemporal(), Align, + ST->getTBAAInfo()); } } @@ -8433,7 +9123,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (NewST.getNode()) return NewST; - if (CombinerAA) { + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : + TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + if (UseAA) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -8443,19 +9135,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Replace the chain to avoid dependency. if (ST->isTruncatingStore()) { - ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, - ST->getPointerInfo(), - ST->getMemoryVT(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr, + ST->getMemoryVT(), ST->getMemOperand()); } else { - ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, - ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr, + ST->getMemOperand()); } // Create token to keep both nodes around. - SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Chain, ReplStore); // Make sure the new and old chains are cleaned up. @@ -8483,10 +9171,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->getMemoryVT().getScalarType().getSizeInBits())); AddToWorkList(Value.getNode()); if (Shorter.getNode()) - return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, - Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + return DAG.getTruncStore(Chain, SDLoc(N), Shorter, + Ptr, ST->getMemoryVT(), ST->getMemOperand()); // Otherwise, see if we can simplify the operation with // SimplifyDemandedBits, which only works if the value has a single use. @@ -8516,10 +9202,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { && Value.getNode()->hasOneUse() && ST->isUnindexed() && TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), ST->getMemoryVT())) { - return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), - Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), + Ptr, ST->getMemoryVT(), ST->getMemOperand()); } // Only perform this optimization before the types are legal, because we @@ -8547,7 +9231,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue InVec = N->getOperand(0); SDValue InVal = N->getOperand(1); SDValue EltNo = N->getOperand(2); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // If the inserted element is an UNDEF, just use the input vector. if (InVal.getOpcode() == ISD::UNDEF) @@ -8568,7 +9252,9 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // be converted to a BUILD_VECTOR). Fill in the Ops vector with the // vector elements. SmallVector<SDValue, 8> Ops; - if (InVec.getOpcode() == ISD::BUILD_VECTOR) { + // Do not combine these two vectors if the output vector will not replace + // the input vector. + if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { Ops.append(InVec.getNode()->op_begin(), InVec.getNode()->op_end()); } else if (InVec.getOpcode() == ISD::UNDEF) { @@ -8608,7 +9294,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue InOp = InVec.getOperand(0); if (InOp.getValueType() != NVT) { assert(InOp.getValueType().isInteger() && NVT.isInteger()); - return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); + return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT); } return InOp; } @@ -8641,8 +9327,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { OrigElt -= NumElem; } - EVT IndexTy = N->getOperand(1).getValueType(); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT, + EVT IndexTy = TLI.getVectorIdxTy(); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, InVec, DAG.getConstant(OrigElt, IndexTy)); } @@ -8756,7 +9442,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { EVT PtrType = NewPtr.getValueType(); if (TLI.isBigEndian()) PtrOff = VT.getSizeInBits() / 8 - PtrOff; - NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, + NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr, DAG.getConstant(PtrOff, PtrType)); } @@ -8773,20 +9459,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // extending load instead. ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(), + Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align); + LVT, LN0->isVolatile(), LN0->isNonTemporal(), + Align, LN0->getTBAAInfo()); Chain = Load.getValue(1); } else { - Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, + Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Align); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), Align, LN0->getTBAAInfo()); Chain = Load.getValue(1); if (NVT.bitsLT(LVT)) - Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load); + Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load); else - Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load); + Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load); } WorkListRemover DeadNodes(*this); SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; @@ -8816,7 +9503,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { return SDValue(); unsigned NumInScalars = N->getNumOperands(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); // Check to see if this is a BUILD_VECTOR of a bunch of values @@ -8918,7 +9605,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumInScalars = N->getNumOperands(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT SrcVT = MVT::Other; unsigned Opcode = ISD::DELETED_NODE; @@ -8983,7 +9670,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); // A vector built entirely of undefs is undef. @@ -9119,8 +9806,35 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return N->getOperand(0); // Check if all of the operands are undefs. + EVT VT = N->getValueType(0); if (ISD::allOperandsUndef(N)) - return DAG.getUNDEF(N->getValueType(0)); + return DAG.getUNDEF(VT); + + // Optimize concat_vectors where one of the vectors is undef. + if (N->getNumOperands() == 2 && + N->getOperand(1)->getOpcode() == ISD::UNDEF) { + SDValue In = N->getOperand(0); + assert(In.getValueType().isVector() && "Must concat vectors"); + + // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). + if (In->getOpcode() == ISD::BITCAST && + !In->getOperand(0)->getValueType(0).isVector()) { + SDValue Scalar = In->getOperand(0); + EVT SclTy = Scalar->getValueType(0); + + if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) + return SDValue(); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, + VT.getSizeInBits() / SclTy.getSizeInBits()); + if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) + return SDValue(); + + SDLoc dl = SDLoc(N); + SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); + return DAG.getNode(ISD::BITCAST, dl, VT, Res); + } + } // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. @@ -9158,7 +9872,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // The extract index must be constant. if (!CS) return SDValue(); - + // Check that we are reading from the identity index. if (CS->getZExtValue() != IdentityIndex) return SDValue(); @@ -9166,7 +9880,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (SingleSource.getNode()) return SingleSource; - + return SDValue(); } @@ -9179,7 +9893,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // (extract_subvec (concat V1, V2, ...), i) // Into: // Vi if possible - // Only operand 0 is checked as 'concat' assumes all inputs of the same type. + // Only operand 0 is checked as 'concat' assumes all inputs of the same + // type. if (V->getOperand(0).getValueType() != NVT) return SDValue(); unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); @@ -9194,7 +9909,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { V = V.getOperand(0); if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Handle only simple case where vector being inserted and vector // being extracted are of same type, and are half size of larger vectors. EVT BigVT = V->getOperand(0).getValueType(); @@ -9246,22 +9961,36 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { for (unsigned I = 0; I != NumConcats; ++I) { // Make sure we're dealing with a copy. unsigned Begin = I * NumElemsPerConcat; - if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) - return SDValue(); + bool AllUndef = true, NoUndef = true; + for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) { + if (SVN->getMaskElt(J) >= 0) + AllUndef = false; + else + NoUndef = false; + } - for (unsigned J = 1; J != NumElemsPerConcat; ++J) { - if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) + if (NoUndef) { + if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) return SDValue(); - } - unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; - if (FirstElt < N0.getNumOperands()) - Ops.push_back(N0.getOperand(FirstElt)); - else - Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); + for (unsigned J = 1; J != NumElemsPerConcat; ++J) + if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) + return SDValue(); + + unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; + if (FirstElt < N0.getNumOperands()) + Ops.push_back(N0.getOperand(FirstElt)); + else + Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); + + } else if (AllUndef) { + Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); + } else { // Mixed with general masks and undefs, can't do optimization. + return SDValue(); + } } - return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, Ops.data(), + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops.data(), Ops.size()); } @@ -9288,7 +10017,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (Idx >= (int)NumElts) Idx -= NumElts; NewMask.push_back(Idx); } - return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT), + return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), &NewMask[0]); } @@ -9298,14 +10027,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); if (Idx >= 0) { - if (Idx < (int)NumElts) - Idx += NumElts; - else + if (Idx >= (int)NumElts) Idx -= NumElts; + else + Idx = -1; // remove reference to lhs } NewMask.push_back(Idx); } - return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT), + return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT), &NewMask[0]); } @@ -9322,7 +10051,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { NewMask.push_back(Idx); } if (Changed) - return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]); + return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]); } // If it is a splat, check if the argument vector is another splat or a @@ -9419,7 +10148,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); if (N->getOpcode() == ISD::AND) { @@ -9450,7 +10179,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT EltVT = RVT.getVectorElementType(); SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), DAG.getConstant(0, EltVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, &ZeroOps[0], ZeroOps.size()); LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); @@ -9506,13 +10235,13 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // legalization, the types may not match between the two BUILD_VECTORS. // Truncate one of the operands to make them match. if (RVT.getSizeInBits() > VT.getSizeInBits()) { - RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp); + RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp); } else { - LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp); + LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp); VT = RVT; } } - SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT, + SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT, LHSOp, RHSOp); if (FoldOp.getOpcode() != ISD::UNDEF && FoldOp.getOpcode() != ISD::Constant && @@ -9523,7 +10252,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } if (Ops.size() == LHS.getNumOperands()) - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), &Ops[0], Ops.size()); } @@ -9548,7 +10277,7 @@ SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { Op.getOpcode() != ISD::ConstantFP) break; EVT EltVT = Op.getValueType(); - SDValue FoldOp = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), EltVT, Op); + SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op); if (FoldOp.getOpcode() != ISD::UNDEF && FoldOp.getOpcode() != ISD::ConstantFP) break; @@ -9559,11 +10288,11 @@ SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { if (Ops.size() != N0.getNumOperands()) return SDValue(); - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), &Ops[0], Ops.size()); } -SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, +SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2){ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); @@ -9577,13 +10306,13 @@ SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, // Check to see if we got a select_cc back (to turn into setcc/select). // Otherwise, just return whatever node we got back, like fabs. if (SCC.getOpcode() == ISD::SELECT_CC) { - SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(), + SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), N0.getValueType(), SCC.getOperand(0), SCC.getOperand(1), SCC.getOperand(4)); AddToWorkList(SETCC.getNode()); - return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(), - SCC.getOperand(2), SCC.getOperand(3), SETCC); + return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), + SCC.getOperand(2), SCC.getOperand(3), SETCC); } return SCC; @@ -9652,10 +10381,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD)) return false; - Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), - LLD->getBasePtr().getValueType(), - TheSelect->getOperand(0), LLD->getBasePtr(), - RLD->getBasePtr()); + Addr = DAG.getSelect(SDLoc(TheSelect), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); } else { // Otherwise SELECT_CC SDNode *CondLHS = TheSelect->getOperand(0).getNode(); SDNode *CondRHS = TheSelect->getOperand(1).getNode(); @@ -9666,7 +10395,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) return false; - Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), + Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), LLD->getBasePtr().getValueType(), TheSelect->getOperand(0), TheSelect->getOperand(1), @@ -9677,17 +10406,17 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue Load; if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { Load = DAG.getLoad(TheSelect->getValueType(0), - TheSelect->getDebugLoc(), - // FIXME: Discards pointer info. + SDLoc(TheSelect), + // FIXME: Discards pointer and TBAA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->isVolatile(), LLD->isNonTemporal(), LLD->isInvariant(), LLD->getAlignment()); } else { Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() : LLD->getExtensionType(), - TheSelect->getDebugLoc(), + SDLoc(TheSelect), TheSelect->getValueType(0), - // FIXME: Discards pointer info. + // FIXME: Discards pointer and TBAA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->getMemoryVT(), LLD->isVolatile(), LLD->isNonTemporal(), LLD->getAlignment()); @@ -9708,7 +10437,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, /// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 /// where 'cond' is the comparison specified by CC. -SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, +SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare) { // (x ? y : y) -> y. @@ -9720,7 +10449,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); // Determine if the condition we're dealing with is constant - SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), + SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, DL, false); if (SCC.getNode()) AddToWorkList(SCC.getNode()); ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); @@ -9786,13 +10515,13 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue One = DAG.getIntPtrConstant(EltSize); SDValue Cond = DAG.getSetCC(DL, - TLI.getSetCCResultType(N0.getValueType()), + getSetCCResultType(N0.getValueType()), N0, N1, CC); AddToWorkList(Cond.getNode()); - SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(), - Cond, One, Zero); + SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), + Cond, One, Zero); AddToWorkList(CstOffset.getNode()); - CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, + CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); AddToWorkList(CPIdx.getNode()); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, @@ -9817,7 +10546,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, ShCtV = XType.getSizeInBits()-ShCtV-1; SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy(N0.getValueType())); - SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), + SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, ShCt); AddToWorkList(Shift.getNode()); @@ -9829,7 +10558,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, return DAG.getNode(ISD::AND, DL, AType, Shift, N2); } - SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), + SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); @@ -9862,14 +10591,14 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue ShlAmt = DAG.getConstant(AndMask.countLeadingZeros(), getShiftAmountTy(AndLHS.getValueType())); - SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt); + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); // Now arithmetic right shift it all the way over, so the result is either // all-ones, or zero. SDValue ShrAmt = DAG.getConstant(AndMask.getBitWidth()-1, getShiftAmountTy(Shl.getValueType())); - SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt); + SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); return DAG.getNode(ISD::AND, DL, VT, Shr, N3); } @@ -9889,21 +10618,21 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // NOTE: Don't create a SETCC if it's not legal on this target. if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, - LegalTypes ? TLI.getSetCCResultType(N0.getValueType()) : MVT::i1)) { + LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) { SDValue Temp, SCC; // cast from setcc result type to select result type if (LegalTypes) { - SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), + SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC); if (N2.getValueType().bitsLT(SCC.getValueType())) - Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), + Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), N2.getValueType()); else - Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), N2.getValueType(), SCC); } else { - SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); - Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); + Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), N2.getValueType(), SCC); } @@ -9914,9 +10643,10 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, return Temp; // shl setcc result by log2 n2c - return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(N2C->getAPIntValue().logBase2(), - getShiftAmountTy(Temp.getValueType()))); + return DAG.getNode( + ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), + getShiftAmountTy(Temp.getValueType()))); } } @@ -9926,8 +10656,8 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { EVT XType = N0.getValueType(); if (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { - SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); + TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) { + SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC); if (Res.getValueType() != VT) Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); return Res; @@ -9937,16 +10667,16 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, XType))) { - SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0); + SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0); return DAG.getNode(ISD::SRL, DL, XType, Ctlz, DAG.getConstant(Log2_32(XType.getSizeInBits()), getShiftAmountTy(Ctlz.getValueType()))); } // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { - SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(), + SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0), XType, DAG.getConstant(0, XType), N0); - SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType); + SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType); return DAG.getNode(ISD::SRL, DL, XType, DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), DAG.getConstant(XType.getSizeInBits()-1, @@ -9954,7 +10684,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, } // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { - SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, + SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); @@ -9980,11 +10710,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, EVT XType = N0.getValueType(); if (SubC && SubC->isNullValue() && XType.isInteger()) { - SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, + SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); - SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), + SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), XType, N0, Shift); AddToWorkList(Shift.getNode()); AddToWorkList(Add.getNode()); @@ -9998,7 +10728,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, /// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, - DebugLoc DL, bool foldBooleans) { + SDLoc DL, bool foldBooleans) { TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, Level, false, this); return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); @@ -10072,17 +10802,20 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, /// isAlias - Return true if there is any possibility that the two addresses /// overlap. -bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, +bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, const Value *SrcValue1, int SrcValueOffset1, unsigned SrcValueAlign1, const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, + SDValue Ptr2, int64_t Size2, bool IsVolatile2, const Value *SrcValue2, int SrcValueOffset2, unsigned SrcValueAlign2, const MDNode *TBAAInfo2) const { // If they are the same then they must be aliases. if (Ptr1 == Ptr2) return true; + // If they are both volatile then they cannot be reordered. + if (IsVolatile1 && IsVolatile2) return true; + // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; @@ -10127,7 +10860,9 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, return false; } - if (CombinerGlobalAA) { + bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA : + TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + if (UseAA && SrcValue1 && SrcValue2) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; @@ -10146,24 +10881,25 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) { SDValue Ptr0, Ptr1; int64_t Size0, Size1; + bool IsVolatile0, IsVolatile1; const Value *SrcValue0, *SrcValue1; int SrcValueOffset0, SrcValueOffset1; unsigned SrcValueAlign0, SrcValueAlign1; const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1; - FindAliasInfo(Op0, Ptr0, Size0, SrcValue0, SrcValueOffset0, + FindAliasInfo(Op0, Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, SrcValueAlign0, SrcTBAAInfo0); - FindAliasInfo(Op1, Ptr1, Size1, SrcValue1, SrcValueOffset1, + FindAliasInfo(Op1, Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, SrcValueAlign1, SrcTBAAInfo1); - return isAlias(Ptr0, Size0, SrcValue0, SrcValueOffset0, + return isAlias(Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, SrcValueAlign0, SrcTBAAInfo0, - Ptr1, Size1, SrcValue1, SrcValueOffset1, + Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, SrcValueAlign1, SrcTBAAInfo1); } /// FindAliasInfo - Extracts the relevant alias information from the memory -/// node. Returns true if the operand was a load. +/// node. Returns true if the operand was a nonvolatile load. bool DAGCombiner::FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, + SDValue &Ptr, int64_t &Size, bool &IsVolatile, const Value *&SrcValue, int &SrcValueOffset, unsigned &SrcValueAlign, @@ -10172,29 +10908,31 @@ bool DAGCombiner::FindAliasInfo(SDNode *N, Ptr = LS->getBasePtr(); Size = LS->getMemoryVT().getSizeInBits() >> 3; + IsVolatile = LS->isVolatile(); SrcValue = LS->getSrcValue(); SrcValueOffset = LS->getSrcValueOffset(); SrcValueAlign = LS->getOriginalAlignment(); TBAAInfo = LS->getTBAAInfo(); - return isa<LoadSDNode>(LS); + return isa<LoadSDNode>(LS) && !IsVolatile; } /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, - SmallVector<SDValue, 8> &Aliases) { + SmallVectorImpl<SDValue> &Aliases) { SmallVector<SDValue, 8> Chains; // List of chains to visit. SmallPtrSet<SDNode *, 16> Visited; // Visited node set. // Get alias information for node. SDValue Ptr; int64_t Size; + bool IsVolatile; const Value *SrcValue; int SrcValueOffset; unsigned SrcValueAlign; const MDNode *SrcTBAAInfo; - bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, - SrcValueAlign, SrcTBAAInfo); + bool IsLoad = FindAliasInfo(N, Ptr, Size, IsVolatile, SrcValue, + SrcValueOffset, SrcValueAlign, SrcTBAAInfo); // Starting off. Chains.push_back(OriginalChain); @@ -10235,20 +10973,21 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, // Get alias information for Chain. SDValue OpPtr; int64_t OpSize; + bool OpIsVolatile; const Value *OpSrcValue; int OpSrcValueOffset; unsigned OpSrcValueAlign; const MDNode *OpSrcTBAAInfo; bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, - OpSrcValue, OpSrcValueOffset, + OpIsVolatile, OpSrcValue, OpSrcValueOffset, OpSrcValueAlign, OpSrcTBAAInfo); // If chain is alias then stop here. if (!(IsLoad && IsOpLoad) && - isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, - SrcTBAAInfo, - OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, + isAlias(Ptr, Size, IsVolatile, SrcValue, SrcValueOffset, + SrcValueAlign, SrcTBAAInfo, + OpPtr, OpSize, OpIsVolatile, OpSrcValue, OpSrcValueOffset, OpSrcValueAlign, OpSrcTBAAInfo)) { Aliases.push_back(Chain); } else { @@ -10298,7 +11037,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return Aliases[0]; // Construct a custom tailored token factor. - return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, &Aliases[0], Aliases.size()); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index e096a23..a6f7461 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -41,6 +41,7 @@ #define DEBUG_TYPE "isel" #include "llvm/CodeGen/FastISel.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Loads.h" #include "llvm/CodeGen/Analysis.h" @@ -89,18 +90,16 @@ bool FastISel::LowerArguments() { // Fallback to SDISel argument lowering code to deal with sret pointer // parameter. return false; - + if (!FastLowerArguments()) return false; - // Enter non-dead arguments into ValueMap for uses in non-entry BBs. + // Enter arguments into ValueMap for uses in non-entry BBs. for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), E = FuncInfo.Fn->arg_end(); I != E; ++I) { - if (!I->use_empty()) { - DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I); - assert(VI != LocalValueMap.end() && "Missed an argument?"); - FuncInfo.ValueMap[I] = VI->second; - } + DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I); + assert(VI != LocalValueMap.end() && "Missed an argument?"); + FuncInfo.ValueMap[I] = VI->second; } return true; } @@ -598,7 +597,10 @@ bool FastISel::SelectCall(const User *I) { case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); - if (!DIVariable(DI->getVariable()).Verify() || + DIVariable DIVar(DI->getVariable()); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!DIVar || !FuncInfo.MF->getMMI().hasDebugInfo()) { DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; @@ -610,16 +612,16 @@ bool FastISel::SelectCall(const User *I) { return true; } - unsigned Reg = 0; unsigned Offset = 0; - if (const Argument *Arg = dyn_cast<Argument>(Address)) { + Optional<MachineOperand> Op; + if (const Argument *Arg = dyn_cast<Argument>(Address)) // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); - if (Offset) - Reg = TRI.getFrameRegister(*FuncInfo.MF); - } - if (!Reg) - Reg = lookUpRegForValue(Address); + if (Offset) + Op = MachineOperand::CreateFI(Offset); + if (!Op) + if (unsigned Reg = lookUpRegForValue(Address)) + Op = MachineOperand::CreateReg(Reg, false); // If we have a VLA that has a "use" in a metadata node that's then used // here but it has no other uses, then we have a problem. E.g., @@ -632,20 +634,29 @@ bool FastISel::SelectCall(const User *I) { // If we assign 'a' a vreg and fast isel later on has to use the selection // DAG isel, it will want to copy the value to the vreg. However, there are // no uses, which goes counter to what selection DAG isel expects. - if (!Reg && !Address->use_empty() && isa<Instruction>(Address) && + if (!Op && !Address->use_empty() && isa<Instruction>(Address) && (!isa<AllocaInst>(Address) || !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) - Reg = FuncInfo.InitializeRegForValue(Address); - - if (Reg) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(TargetOpcode::DBG_VALUE)) - .addReg(Reg, RegState::Debug).addImm(Offset) - .addMetadata(DI->getVariable()); - else + Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address), + false); + + if (Op) { + if (Op->isReg()) { + Op->setIsDebug(true); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0, + DI->getVariable()); + } else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::DBG_VALUE)) + .addOperand(*Op) + .addImm(0) + .addMetadata(DI->getVariable()); + } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + } return true; } case Intrinsic::dbg_value: { @@ -673,13 +684,14 @@ bool FastISel::SelectCall(const User *I) { .addFPImm(CF).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (unsigned Reg = lookUpRegForValue(V)) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) - .addReg(Reg, RegState::Debug).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); + // FIXME: This does not handle register-indirect values at offset 0. + bool IsIndirect = DI->getOffset() != 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, IsIndirect, + Reg, DI->getOffset(), DI->getVariable()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); } return true; } @@ -1559,4 +1571,19 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI); } +bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) { + // Must be an add. + if (!isa<AddOperator>(Add)) + return false; + // Type size needs to match. + if (TD.getTypeSizeInBits(GEP->getType()) != + TD.getTypeSizeInBits(Add->getType())) + return false; + // Must be in the same basic block. + if (isa<Instruction>(Add) && + FuncInfo.MBBMap[cast<Instruction>(Add)->getParent()] != FuncInfo.MBB) + return false; + // Must have a constant operand. + return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1)); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index b46edad..4309dc1 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -55,21 +55,19 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { return false; } -FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli) - : TLI(tli) { -} - void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { + const TargetLowering *TLI = TM.getTargetLowering(); + Fn = &fn; MF = &mf; RegInfo = &MF->getRegInfo(); // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, TLI); - CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF, - Fn->isVarArg(), - Outs, Fn->getContext()); + GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI); + CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF, + Fn->isVarArg(), + Outs, Fn->getContext()); // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines @@ -79,9 +77,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { Type *Ty = AI->getAllocatedType(); - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), + std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), AI->getAlignment()); TySize *= CUI->getZExtValue(); // Get total allocated size. @@ -114,8 +112,11 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { // in a predictable order. if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) { MachineModuleInfo &MMI = MF->getMMI(); + DIVariable DIVar(DI->getVariable()); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); if (MMI.hasDebugInfo() && - DIVariable(DI->getVariable()).Verify() && + DIVar && !DI->getDebugLoc().isUnknown()) { // Don't handle byval struct arguments or VLAs, for example. // Non-byval arguments are handled here (they refer to the stack @@ -167,10 +168,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { assert(PHIReg && "PHI node does not have an assigned virtual register!"); SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, PN->getType(), ValueVTs); + ComputeValueVTs(*TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT); + unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT); const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); for (unsigned i = 0; i != NumRegisters; ++i) BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i); @@ -208,7 +209,8 @@ void FunctionLoweringInfo::clear() { /// CreateReg - Allocate a single virtual register for the given type. unsigned FunctionLoweringInfo::CreateReg(MVT VT) { - return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); + return RegInfo-> + createVirtualRegister(TM.getTargetLowering()->getRegClassFor(VT)); } /// CreateRegs - Allocate the appropriate number of virtual registers of @@ -219,15 +221,17 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) { /// will assign registers for each member or element. /// unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { + const TargetLowering *TLI = TM.getTargetLowering(); + SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, Ty, ValueVTs); + ComputeValueVTs(*TLI, Ty, ValueVTs); unsigned FirstReg = 0; for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; - MVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT); + MVT RegisterVT = TLI->getRegisterType(Ty->getContext(), ValueVT); - unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT); + unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT); for (unsigned i = 0; i != NumRegs; ++i) { unsigned R = CreateReg(RegisterVT); if (!FirstReg) FirstReg = R; @@ -266,15 +270,17 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { if (!Ty->isIntegerTy() || Ty->isVectorTy()) return; + const TargetLowering *TLI = TM.getTargetLowering(); + SmallVector<EVT, 1> ValueVTs; - ComputeValueVTs(TLI, Ty, ValueVTs); + ComputeValueVTs(*TLI, Ty, ValueVTs); assert(ValueVTs.size() == 1 && "PHIs with non-vector integer types should have a single VT."); EVT IntVT = ValueVTs[0]; - if (TLI.getNumRegisters(PN->getContext(), IntVT) != 1) + if (TLI->getNumRegisters(PN->getContext(), IntVT) != 1) return; - IntVT = TLI.getTypeToTransformTo(PN->getContext(), IntVT); + IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT); unsigned BitWidth = IntVT.getSizeInBits(); unsigned DestReg = ValueMap[PN]; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 3b1abd7..3a8fb85 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -211,6 +212,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); + unsigned NumResults = CountResults(Node); for (unsigned i = 0; i < II.getNumDefs(); ++i) { // If the specific node value is only used by a CopyToReg and the dest reg // is a vreg in the same register class, use the CopyToReg'd destination @@ -218,6 +220,10 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, unsigned VRBase = 0; const TargetRegisterClass *RC = TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); + // If the register class is unknown for the given definition, then try to + // infer one from the value type. + if (!RC && i < NumResults) + RC = TLI->getRegClassFor(Node->getSimpleValueType(i)); if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); @@ -639,8 +645,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, if (SD->getKind() == SDDbgValue::FRAMEIX) { // Stack address; this needs to be lowered in target-dependent fashion. // EmitTargetCodeForFrameDebugValue is responsible for allocation. - unsigned FrameIx = SD->getFrameIx(); - return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL); + return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(SD->getFrameIx()).addImm(Offset).addMetadata(MDPtr); } // Otherwise, we're going to create an instruction here. const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); @@ -678,7 +684,13 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, MIB.addReg(0U); } - MIB.addImm(Offset).addMetadata(MDPtr); + if (Offset != 0) // Indirect addressing. + MIB.addImm(Offset); + else + MIB.addReg(0U, RegState::Debug); + + MIB.addMetadata(MDPtr); + return &*MIB; } @@ -716,10 +728,20 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); + unsigned NumDefs = II.getNumDefs(); + const uint16_t *ScratchRegs = NULL; + + // Handle PATCHPOINT specially and then use the generic code. + if (Opc == TargetOpcode::PATCHPOINT) { + unsigned CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); + NumDefs = NumResults; + ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC); + } + unsigned NumImpUses = 0; unsigned NodeOperands = - countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses); - bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; + countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); + bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) @@ -742,14 +764,20 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. - bool HasOptPRefs = II.getNumDefs() > NumResults; + bool HasOptPRefs = NumDefs > NumResults; assert((!HasOptPRefs || !HasPhysRegOuts) && "Unable to cope with optional defs and phys regs defs!"); - unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; + unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) - AddOperand(MIB, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, + AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); + // Add scratch registers as implicit def and early clobber + if (ScratchRegs) + for (unsigned i = 0; ScratchRegs[i]; ++i) + MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine | + RegState::EarlyClobber); + // Transfer all of the memory reference descriptions of this instruction. MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), cast<MachineSDNode>(Node)->memoperands_end()); @@ -778,8 +806,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Additional results must be physical register defs. if (HasPhysRegOuts) { - for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { - unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; + for (unsigned i = NumDefs; i < NumResults; ++i) { + unsigned Reg = II.getImplicitDefs()[i - NumDefs]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index a9c2203..920dda8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -1,4 +1,4 @@ -//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==// +//===- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG -*- C++ -*--==// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 2a1d8c2..9061ae9 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -58,6 +58,10 @@ class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { /// LegalizedNodes - The set of nodes which have already been legalized. SmallPtrSet<SDNode *, 16> LegalizedNodes; + EVT getSetCCResultType(EVT VT) const { + return TLI.getSetCCResultType(*DAG.getContext(), VT); + } + // Libcall insertion helpers. public: @@ -79,24 +83,24 @@ private: /// is necessary to spill the vector being inserted into to memory, perform /// the insert there, and then read the result back. SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, - SDValue Idx, DebugLoc dl); + SDValue Idx, SDLoc dl); SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, - SDValue Idx, DebugLoc dl); + SDValue Idx, SDLoc dl); /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> - SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, + SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, SDValue N1, SDValue N2, ArrayRef<int> Mask) const; - void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, - DebugLoc dl); + bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + bool &NeedInvert, SDLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, - unsigned NumOps, bool isSigned, DebugLoc dl); + unsigned NumOps, bool isSigned, SDLoc dl); std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); @@ -113,21 +117,21 @@ private: void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); - SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); + SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, SDLoc dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl<SDValue> &Results); SDValue ExpandFCOPYSIGN(SDNode *Node); SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, - DebugLoc dl); + SDLoc dl); SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, - DebugLoc dl); + SDLoc dl); SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, - DebugLoc dl); + SDLoc dl); - SDValue ExpandBSWAP(SDValue Op, DebugLoc dl); - SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl); + SDValue ExpandBSWAP(SDValue Op, SDLoc dl); + SDValue ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); SDValue ExpandInsertToVectorThroughStack(SDValue Op); @@ -181,7 +185,7 @@ public: /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue -SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, +SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, SDValue N1, SDValue N2, ArrayRef<int> Mask) const { unsigned NumMaskElts = VT.getVectorNumElements(); @@ -247,7 +251,7 @@ void SelectionDAGLegalize::LegalizeDAG() { SDValue SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { bool Extend = false; - DebugLoc dl = CFP->getDebugLoc(); + SDLoc dl(CFP); // If a FP immediate is precise when represented as a float and if the // target can do an extending load from float to double, we put it into @@ -307,7 +311,9 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Val = ST->getValue(); EVT VT = Val.getValueType(); int Alignment = ST->getAlignment(); - DebugLoc dl = ST->getDebugLoc(); + unsigned AS = ST->getAddressSpace(); + + SDLoc dl(ST); if (ST->getMemoryVT().isFloatingPoint() || ST->getMemoryVT().isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); @@ -339,7 +345,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr, MachinePointerInfo(), StoredVT, false, false, 0); - SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy(AS)); SmallVector<SDValue, 8> Stores; unsigned Offset = 0; @@ -377,7 +383,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, .getWithOffset(Offset), MemVT, ST->isVolatile(), ST->isNonTemporal(), - MinAlign(ST->getAlignment(), Offset))); + MinAlign(ST->getAlignment(), Offset), + ST->getTBAAInfo())); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], @@ -404,13 +411,14 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr, ST->getPointerInfo(), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, TLI.getPointerTy())); + DAG.getConstant(IncrementSize, TLI.getPointerTy(AS))); Alignment = MinAlign(Alignment, IncrementSize); Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), - Alignment); + Alignment, ST->getTBAAInfo()); SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); @@ -428,16 +436,14 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0); EVT LoadedVT = LD->getMemoryVT(); - DebugLoc dl = LD->getDebugLoc(); + SDLoc dl(LD); if (VT.isFloatingPoint() || VT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) { // Expand to a (misaligned) integer load of the same size, // then bitconvert to floating point or vector. - SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), - LD->isVolatile(), - LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, + LD->getMemOperand()); SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); if (LoadedVT != VT) Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : @@ -470,7 +476,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), - MinAlign(LD->getAlignment(), Offset)); + MinAlign(LD->getAlignment(), Offset), + LD->getTBAAInfo()); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, MachinePointerInfo(), false, false, 0)); @@ -488,7 +495,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->isVolatile(), LD->isNonTemporal(), - MinAlign(LD->getAlignment(), Offset)); + MinAlign(LD->getAlignment(), Offset), + LD->getTBAAInfo()); // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. @@ -532,23 +540,25 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, if (TLI.isLittleEndian()) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), Alignment); + LD->isNonTemporal(), Alignment, LD->getTBAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, TLI.getPointerTy())); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment, IncrementSize), + LD->getTBAAInfo()); } else { Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), Alignment); + LD->isNonTemporal(), Alignment, LD->getTBAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, TLI.getPointerTy())); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment, IncrementSize), + LD->getTBAAInfo()); } // aggregate the two parts @@ -570,7 +580,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, /// the insert there, and then read the result back. SDValue SelectionDAGLegalize:: PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, - DebugLoc dl) { + SDLoc dl) { SDValue Tmp1 = Vec; SDValue Tmp2 = Val; SDValue Tmp3 = Idx; @@ -606,13 +616,13 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, false, false, 0); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, - MachinePointerInfo::getFixedStack(SPFI), false, false, + MachinePointerInfo::getFixedStack(SPFI), false, false, false, 0); } SDValue SelectionDAGLegalize:: -ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) { +ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, SDLoc dl) { if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) { // SCALAR_TO_VECTOR requires that the type of the value being inserted // match the element type of the vector being created, except for @@ -651,7 +661,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - DebugLoc dl = ST->getDebugLoc(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); + SDLoc dl(ST); if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && TLI.isTypeLegal(MVT::i32)) { @@ -659,7 +670,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { bitcastToAPInt().zextOrTrunc(32), MVT::i32); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); } if (CFP->getValueType(0) == MVT::f64) { @@ -668,7 +679,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); } if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { @@ -681,12 +692,13 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(4)); + DAG.getConstant(4, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), - isVolatile, isNonTemporal, MinAlign(Alignment, 4U)); + isVolatile, isNonTemporal, MinAlign(Alignment, 4U), + TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -699,11 +711,12 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { StoreSDNode *ST = cast<StoreSDNode>(Node); SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { @@ -741,7 +754,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -763,7 +776,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + NVT, isVolatile, isNonTemporal, Alignment, + TBAAInfo); ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. @@ -784,19 +798,20 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the bottom RoundWidth bits. Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), RoundVT, - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, + TBAAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); } else { // Big endian - avoid unaligned stores. // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X @@ -805,16 +820,17 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), - RoundVT, isVolatile, isNonTemporal, Alignment); + RoundVT, isVolatile, isNonTemporal, Alignment, + TBAAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); } // The order of the stores doesn't matter. @@ -850,7 +866,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -863,7 +879,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { SDValue Chain = LD->getChain(); // The chain. SDValue Ptr = LD->getBasePtr(); // The base pointer. SDValue Value; // The value returned by the load op. - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { @@ -898,9 +914,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { assert(NVT.getSizeInBits() == VT.getSizeInBits() && "Can only promote loads to same size type"); - SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getMemOperand()); RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res); RChain = Res.getValue(1); break; @@ -920,6 +934,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); if (SrcWidth != SrcVT.getStoreSizeInBits() && // Some targets pretend to have an i1 loading operation, and actually @@ -946,7 +961,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { SDValue Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + NVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); Ch = Result.getValue(1); // The chain. @@ -983,16 +998,16 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1012,17 +1027,17 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the top RoundWidth bits. Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1075,9 +1090,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { case TargetLowering::Expand: if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, - LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + LD->getMemOperand()); unsigned ExtendOp; switch (ExtType) { case ISD::EXTLOAD: @@ -1105,9 +1118,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Turn the unsupported load into an EXTLOAD followed by an explicit // zero/sign extend inreg. SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Chain, Ptr, LD->getPointerInfo(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + Chain, Ptr, SrcVT, + LD->getMemOperand()); SDValue ValRes; if (ExtType == ISD::SEXTLOAD) ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, @@ -1249,7 +1261,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action == TargetLowering::Expand) { // replace ISD::DEBUGTRAP with ISD::TRAP SDValue NewVal; - NewVal = DAG.getNode(ISD::TRAP, Node->getDebugLoc(), Node->getVTList(), + NewVal = DAG.getNode(ISD::TRAP, SDLoc(Node), Node->getVTList(), Node->getOperand(0)); ReplaceNode(Node, NewVal.getNode()); LegalizeOp(NewVal.getNode()); @@ -1370,7 +1382,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { SDValue Vec = Op.getOperand(0); SDValue Idx = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Store the value to a temporary stack slot, then LOAD the returned part. SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, @@ -1382,11 +1394,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, DAG.getConstant(EltSize, Idx.getValueType())); - if (Idx.getValueType().bitsGT(TLI.getPointerTy())) - Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); - else - Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); - + Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); if (Op.getValueType().isVector()) @@ -1404,7 +1412,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { SDValue Vec = Op.getOperand(0); SDValue Part = Op.getOperand(1); SDValue Idx = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Store the value to a temporary stack slot, then LOAD the returned part. @@ -1424,11 +1432,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, DAG.getConstant(EltSize, Idx.getValueType())); - - if (Idx.getValueType().bitsGT(TLI.getPointerTy())) - Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); - else - Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); @@ -1449,7 +1453,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // Create the stack frame object. EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); SDValue FIPtr = DAG.CreateStackTemporary(VT); int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI); @@ -1489,12 +1493,12 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { StoreChain = DAG.getEntryNode(); // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, + return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, false, 0); } SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); SDValue Tmp1 = Node->getOperand(0); SDValue Tmp2 = Node->getOperand(1); @@ -1527,7 +1531,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), - LoadPtr, DAG.getIntPtrConstant(ByteOffset)); + LoadPtr, + DAG.getConstant(ByteOffset, LoadPtr.getValueType())); // Load a legal integer containing the sign bit. SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), false, false, false, 0); @@ -1542,16 +1547,16 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { } } // Now get the sign bit proper, by seeing whether the value is negative. - SignBit = DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()), + SignBit = DAG.getSetCC(dl, getSetCCResultType(SignBit.getValueType()), SignBit, DAG.getConstant(0, SignBit.getValueType()), ISD::SETLT); // Get the absolute value of the result. SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1); // Select between the nabs and abs value based on the sign bit of // the input. - return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit, - DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), - AbsVal); + return DAG.getSelect(dl, AbsVal.getValueType(), SignBit, + DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), + AbsVal); } void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, @@ -1559,7 +1564,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" " not tell us which reg is the stack pointer!"); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); EVT VT = Node->getValueType(0); SDValue Tmp1 = SDValue(Node, 0); SDValue Tmp2 = SDValue(Node, 1); @@ -1568,52 +1573,76 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, // Chain the dynamic stack allocation so that it doesn't modify the stack // pointer when other instructions are using the stack. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true), + SDLoc(Node)); SDValue Size = Tmp2.getOperand(1); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); - if (Align > StackAlign) - SP = DAG.getNode(ISD::AND, dl, VT, SP, - DAG.getConstant(-(uint64_t)Align, VT)); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + if (Align > StackAlign) + Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, + DAG.getConstant(-(uint64_t)Align, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), - DAG.getIntPtrConstant(0, true), SDValue()); + DAG.getIntPtrConstant(0, true), SDValue(), + SDLoc(Node)); Results.push_back(Tmp1); Results.push_back(Tmp2); } /// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and -/// condition code CC on the current target. This routine expands SETCC with -/// illegal condition code into AND / OR of multiple SETCC values. -void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, +/// condition code CC on the current target. +/// +/// If the SETCC has been legalized using AND / OR, then the legalized node +/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert +/// will be set to false. +/// +/// If the SETCC has been legalized by using getSetCCSwappedOperands(), +/// then the values of LHS and RHS will be swapped, CC will be set to the +/// new condition, and NeedInvert will be set to false. +/// +/// If the SETCC has been legalized using the inverse condcode, then LHS and +/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert +/// will be set to true. The caller must invert the result of the SETCC with +/// SelectionDAG::getNOT() or take equivalent action to swap the effect of a +/// true/false result. +/// +/// \returns true if the SetCC has been legalized, false if it hasn't. +bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, - DebugLoc dl) { + bool &NeedInvert, + SDLoc dl) { MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); + NeedInvert = false; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: // Nothing to do. break; case TargetLowering::Expand: { + ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode); + if (TLI.isCondCodeLegal(InvCC, OpVT)) { + std::swap(LHS, RHS); + CC = DAG.getCondCode(InvCC); + return true; + } ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; - ISD::CondCode InvCC = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { default: llvm_unreachable("Don't know how to expand this condition!"); - case ISD::SETO: + case ISD::SETO: assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT) == TargetLowering::Legal && "If SETO is expanded, SETOEQ must be legal!"); CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; - case ISD::SETUO: + case ISD::SETUO: assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT) == TargetLowering::Legal && "If SETUO is expanded, SETUNE must be legal!"); @@ -1623,12 +1652,12 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, case ISD::SETOGE: case ISD::SETOLT: case ISD::SETOLE: - case ISD::SETONE: - case ISD::SETUEQ: - case ISD::SETUNE: - case ISD::SETUGT: - case ISD::SETUGE: - case ISD::SETULT: + case ISD::SETONE: + case ISD::SETUEQ: + case ISD::SETUNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: case ISD::SETULE: // If we are floating point, assign and break, otherwise fall through. if (!OpVT.isInteger()) { @@ -1644,20 +1673,23 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, case ISD::SETGT: case ISD::SETGE: case ISD::SETLT: + // We only support using the inverted operation, which is computed above + // and not a different manner of supporting expanding these cases. + llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETNE: case ISD::SETEQ: - InvCC = ISD::getSetCCSwappedOperands(CCCode); - if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) { - // We only support using the inverted operation and not a - // different manner of supporting expanding these cases. - llvm_unreachable("Don't know how to expand this condition!"); + // Try inverting the result of the inverse condition. + InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ; + if (TLI.isCondCodeLegal(InvCC, OpVT)) { + CC = DAG.getCondCode(InvCC); + NeedInvert = true; + return true; } - LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC); - RHS = SDValue(); - CC = SDValue(); - return; + // If inverting the condition didn't work then we have no means to expand + // the condition. + llvm_unreachable("Don't know how to expand this condition!"); } - + SDValue SetCC1, SetCC2; if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { // If we aren't the ordered or unorder operation, @@ -1672,9 +1704,10 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); RHS = SDValue(); CC = SDValue(); - break; + return true; } } + return false; } /// EmitStackConvert - Emit a store/load combination to the stack. This stores @@ -1684,7 +1717,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, - DebugLoc dl) { + SDLoc dl) { // Create the stack frame object. unsigned SrcAlign = TLI.getDataLayout()->getPrefTypeAlignment(SrcOp.getValueType(). @@ -1725,7 +1758,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, } SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); // Create a vector sized/aligned stack slot, store the value to element #0, // then load the whole vector back out. SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0)); @@ -1749,7 +1782,7 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned NumElems = Node->getNumOperands(); SDValue Value1, Value2; - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); EVT VT = Node->getValueType(0); EVT OpVT = Node->getOperand(0).getValueType(); EVT EltVT = VT.getVectorElementType(); @@ -1881,7 +1914,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), isTailCall, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, Node->getDebugLoc()); + Callee, Args, DAG, SDLoc(Node)); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -1896,7 +1929,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, /// and returning a result of type RetVT. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, - bool isSigned, DebugLoc dl) { + bool isSigned, SDLoc dl) { TargetLowering::ArgListTy Args; Args.reserve(NumOps); @@ -1950,7 +1983,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, Node->getDebugLoc()); + Callee, Args, DAG, SDLoc(Node)); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); return CallInfo; @@ -1963,7 +1996,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; @@ -1981,7 +2014,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; @@ -1996,7 +2029,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; @@ -2043,7 +2076,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned = Opcode == ISD::SDIVREM; RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; @@ -2082,7 +2115,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); TargetLowering:: CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, @@ -2100,7 +2133,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, /// isSinCosLibcallAvailable - Return true if sincos libcall is available. static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = RTLIB::SINCOS_F32; break; case MVT::f64: LC = RTLIB::SINCOS_F64; break; @@ -2130,7 +2163,7 @@ static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, static bool useSinCos(SDNode *Node) { unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN ? ISD::FCOS : ISD::FSIN; - + SDValue Op0 = Node->getOperand(0); for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), UE = Op0.getNode()->use_end(); UI != UE; ++UI) { @@ -2150,7 +2183,7 @@ void SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = RTLIB::SINCOS_F32; break; case MVT::f64: LC = RTLIB::SINCOS_F64; break; @@ -2158,25 +2191,25 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, case MVT::f128: LC = RTLIB::SINCOS_F128; break; case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; } - + // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. SDValue InChain = DAG.getEntryNode(); - + EVT RetVT = Node->getValueType(0); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - + TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - + // Pass the argument. Entry.Node = Node->getOperand(0); Entry.Ty = RetTy; Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); - + // Pass the return address of sin. SDValue SinPtr = DAG.CreateStackTemporary(RetVT); Entry.Node = SinPtr; @@ -2184,7 +2217,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); - + // Also pass the return address of the cos. SDValue CosPtr = DAG.CreateStackTemporary(RetVT); Entry.Node = CosPtr; @@ -2192,11 +2225,11 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); - + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - - DebugLoc dl = Node->getDebugLoc(); + + SDLoc dl(Node); TargetLowering:: CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, @@ -2218,7 +2251,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, - DebugLoc dl) { + SDLoc dl) { if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { // simple 32-bit [signed|unsigned] integer to float/double expansion @@ -2226,11 +2259,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); // word offset constant for Hi/Lo address computation - SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy()); + SDValue WordOff = DAG.getConstant(sizeof(int), StackSlot.getValueType()); // set up Hi and Lo (into buffer) address based on endian SDValue Hi = StackSlot; - SDValue Lo = DAG.getNode(ISD::ADD, dl, - TLI.getPointerTy(), StackSlot, WordOff); + SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(), + StackSlot, WordOff); if (TLI.isLittleEndian()) std::swap(Hi, Lo); @@ -2327,9 +2360,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, // select. We happen to get lucky and machinesink does the right // thing most of the time. This would be a good candidate for a //pseudo-op, or, even better, for whole-function isel. - SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + SDValue SignBitTest = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT); - return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast); + return DAG.getSelect(dl, MVT::f32, SignBitTest, Slow, Fast); } // Otherwise, implement the fully general conversion. @@ -2340,13 +2373,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, DAG.getConstant(UINT64_C(0x800), MVT::i64)); SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0x7ff), MVT::i64)); - SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE); - SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0); - SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + SDValue Sel = DAG.getSelect(dl, MVT::i64, Ne, Or, Op0); + SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), ISD::SETUGE); - SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0); + SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0); EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType()); SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, @@ -2365,18 +2398,18 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); - SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()), + SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(Op0.getValueType()), Op0, DAG.getConstant(0, Op0.getValueType()), ISD::SETLT); SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4); - SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), + SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Four, Zero); // If the sign bit of the integer is set, the large number will be treated // as a negative number. To counteract this, the dynamic code adds an // offset depending on the data type. uint64_t FF; - switch (Op0.getValueType().getSimpleVT().SimpleTy) { + switch (Op0.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unsupported integer type!"); case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) @@ -2389,7 +2422,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); - CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset); + CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset); Alignment = std::min(Alignment, 4u); SDValue FudgeInReg; if (DestVT == MVT::f32) @@ -2417,7 +2450,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, - DebugLoc dl) { + SDLoc dl) { // First step, figure out the appropriate *INT_TO_FP operation to use. EVT NewInTy = LegalOp.getValueType(); @@ -2459,7 +2492,7 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, - DebugLoc dl) { + SDLoc dl) { // First step, figure out the appropriate FP_TO*INT operation to use. EVT NewOutTy = DestVT; @@ -2494,7 +2527,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, /// ExpandBSWAP - Open code the operations for BSWAP of the specified operation. /// -SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { +SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; @@ -2542,7 +2575,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { /// ExpandBitCount - Expand the specified bitcount instruction into operations. /// SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, - DebugLoc dl) { + SDLoc dl) { switch (Opc) { default: llvm_unreachable("Cannot expand this yet!"); case ISD::CTPOP: { @@ -2650,6 +2683,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; } break; case ISD::ATOMIC_CMP_SWAP: @@ -2659,6 +2693,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; } break; case ISD::ATOMIC_LOAD_ADD: @@ -2668,6 +2703,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; } break; case ISD::ATOMIC_LOAD_SUB: @@ -2677,6 +2713,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; } break; case ISD::ATOMIC_LOAD_AND: @@ -2686,6 +2723,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; } break; case ISD::ATOMIC_LOAD_OR: @@ -2695,6 +2733,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; } break; case ISD::ATOMIC_LOAD_XOR: @@ -2704,6 +2743,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; } break; case ISD::ATOMIC_LOAD_NAND: @@ -2713,6 +2753,47 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; + } + break; + case ISD::ATOMIC_LOAD_MAX: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MAX_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MAX_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MAX_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MAX_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MAX_16;break; + } + break; + case ISD::ATOMIC_LOAD_UMAX: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMAX_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMAX_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMAX_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMAX_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMAX_16;break; + } + break; + case ISD::ATOMIC_LOAD_MIN: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MIN_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MIN_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MIN_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MIN_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MIN_16;break; + } + break; + case ISD::ATOMIC_LOAD_UMIN: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMIN_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMIN_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMIN_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMIN_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMIN_16;break; } break; } @@ -2722,8 +2803,9 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SmallVector<SDValue, 8> Results; - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; + bool NeedInvert; switch (Node->getOpcode()) { case ISD::CTPOP: case ISD::CTLZ: @@ -2913,7 +2995,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { APInt x = APInt::getSignBit(NVT.getSizeInBits()); (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); Tmp1 = DAG.getConstantFP(apf, VT); - Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT), Node->getOperand(0), Tmp1, ISD::SETLT); True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0)); @@ -2922,7 +3004,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(0), Tmp1)); False = DAG.getNode(ISD::XOR, dl, NVT, False, DAG.getConstant(x, NVT)); - Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False); + Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False); Results.push_back(Tmp1); break; } @@ -2934,27 +3016,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned Align = Node->getConstantOperandVal(3); SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, - MachinePointerInfo(V), + MachinePointerInfo(V), false, false, false, 0); SDValue VAList = VAListLoad; if (Align > TLI.getMinStackArgumentAlignment()) { assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); - VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, DAG.getConstant(Align - 1, - TLI.getPointerTy())); + VAList.getValueType())); - VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList, + VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList, DAG.getConstant(-(int64_t)Align, - TLI.getPointerTy())); + VAList.getValueType())); } // Increment the pointer, VAList, to the next vaarg - Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, DAG.getConstant(TLI.getDataLayout()-> getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), - TLI.getPointerTy())); + VAList.getValueType())); // Store the incremented VAList to the legalized pointer Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, MachinePointerInfo(V), false, false, 0); @@ -3025,7 +3107,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // cast operands to v8i32 and re-build the mask. // Calculate new VT, the size of the new VT should be equal to original. - EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, VT.getSizeInBits()/NewEltVT.getSizeInBits()); assert(NewVT.bitsEq(VT)); @@ -3065,11 +3147,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (Idx < NumElems) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0, - DAG.getIntPtrConstant(Idx))); + DAG.getConstant(Idx, TLI.getVectorIdxTy()))); else Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1, - DAG.getIntPtrConstant(Idx - NumElems))); + DAG.getConstant(Idx - NumElems, + TLI.getVectorIdxTy()))); } Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); @@ -3131,10 +3214,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT VT = Node->getValueType(0); Tmp1 = Node->getOperand(0); Tmp2 = DAG.getConstantFP(0.0, VT); - Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, ISD::SETUGT); Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1); - Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3); + Tmp1 = DAG.getSelect(dl, VT, Tmp2, Tmp1, Tmp3); Results.push_back(Tmp1); break; } @@ -3224,6 +3307,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128)); break; + case ISD::FROUND: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128)); + break; case ISD::FPOWI: Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, RTLIB::POWI_F128, @@ -3263,22 +3353,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(ExpandConstantFP(CFP, true)); break; } - case ISD::EHSELECTION: { - unsigned Reg = TLI.getExceptionSelectorRegister(); - assert(Reg && "Can't expand to unknown register!"); - Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg, - Node->getValueType(0))); - Results.push_back(Results[0].getValue(1)); - break; - } - case ISD::EXCEPTIONADDR: { - unsigned Reg = TLI.getExceptionPointerRegister(); - assert(Reg && "Can't expand to unknown register!"); - Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg, - Node->getValueType(0))); - Results.push_back(Results[0].getValue(1)); - break; - } case ISD::FSUB: { EVT VT = Node->getValueType(0); assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) && @@ -3528,10 +3602,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy(BottomHalf.getValueType())); Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1); - TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1, + TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1, ISD::SETNE); } else { - TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, + TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, DAG.getConstant(0, VT), ISD::SETNE); } Results.push_back(BottomHalf); @@ -3574,9 +3648,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); - Index = DAG.getNode(ISD::MUL, dl, PTy, - Index, DAG.getConstant(EntrySize, PTy)); - SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), + Index, DAG.getConstant(EntrySize, Index.getValueType())); + SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(), + Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, @@ -3620,10 +3695,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); Tmp3 = Node->getOperand(2); - LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl); + bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, + Tmp3, NeedInvert, dl); + + if (Legalized) { + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SETCC node. + if (Tmp3.getNode()) + Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), + Tmp1, Tmp2, Tmp3); + + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + Tmp1 = DAG.getNOT(dl, Tmp1, Tmp1->getValueType(0)); - // If we expanded the SETCC into an AND/OR, return the new node - if (Tmp2.getNode() == 0) { Results.push_back(Tmp1); break; } @@ -3654,14 +3740,52 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp4 = Node->getOperand(3); // False SDValue CC = Node->getOperand(4); - LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()), - Tmp1, Tmp2, CC, dl); + bool Legalized = false; + // Try to legalize by inverting the condition. This is for targets that + // might support an ordered version of a condition, but not the unordered + // version (or vice versa). + ISD::CondCode InvCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), + Tmp1.getValueType().isInteger()); + if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) { + // Use the new condition code and swap true and false + Legalized = true; + Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC); + } else { + // If The inverse is not legal, then try to swap the arguments using + // the inverse condition code. + ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC); + if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) { + // The swapped inverse condition is legal, so swap true and false, + // lhs and rhs. + Legalized = true; + Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC); + } + } + + if (!Legalized) { + Legalized = LegalizeSetCCCondCode( + getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert, + dl); - assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!"); - Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); - CC = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, - Tmp3, Tmp4, CC); + assert(Legalized && "Can't legalize SELECT_CC with legal condition!"); + + // If we expanded the SETCC by inverting the condition code, then swap + // the True/False operands to match. + if (NeedInvert) + std::swap(Tmp3, Tmp4); + + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SELECT_CC node. + if (CC.getNode()) { + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), + Tmp1, Tmp2, Tmp3, Tmp4, CC); + } else { + Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); + CC = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, + Tmp3, Tmp4, CC); + } + } Results.push_back(Tmp1); break; } @@ -3671,14 +3795,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp3 = Node->getOperand(3); // RHS Tmp4 = Node->getOperand(1); // CC - LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), - Tmp2, Tmp3, Tmp4, dl); - - assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); - Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); - Tmp4 = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, - Tmp3, Node->getOperand(4)); + bool Legalized = LegalizeSetCCCondCode(getSetCCResultType( + Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl); + (void)Legalized; + assert(Legalized && "Can't legalize BR_CC with legal condition!"); + + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0)); + + // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC + // node. + if (Tmp4.getNode()) { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, + Tmp4, Tmp2, Tmp3, Node->getOperand(4)); + } else { + Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); + Tmp4 = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, + Tmp3, Node->getOperand(4)); + } Results.push_back(Tmp1); break; } @@ -3698,10 +3835,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { for (unsigned Idx = 0; Idx < NumElem; Idx++) { SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), - Node->getOperand(0), DAG.getIntPtrConstant(Idx)); + Node->getOperand(0), DAG.getConstant(Idx, + TLI.getVectorIdxTy())); SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), - Node->getOperand(1), DAG.getIntPtrConstant(Idx)); + Node->getOperand(1), DAG.getConstant(Idx, + TLI.getVectorIdxTy())); Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, VT.getScalarType(), Ex, Sh)); } @@ -3738,7 +3877,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { OVT = Node->getOperand(0).getSimpleValueType(); } MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3; switch (Node->getOpcode()) { case ISD::CTTZ: @@ -3753,11 +3892,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); if (Node->getOpcode() == ISD::CTTZ) { // FIXME: This should set a bit in the zero extended value instead. - Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT), ISD::SETEQ); - Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, - DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); + Tmp1 = DAG.getSelect(dl, NVT, Tmp2, + DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); } else if (Node->getOpcode() == ISD::CTLZ || Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) @@ -3852,7 +3991,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); // Perform the larger operation, then round down. - Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3); + Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3); if (TruncOp != ISD::FP_ROUND) Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); else diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index de217d8..ecf4c5d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -88,6 +88,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; case ISD::FREM: R = SoftenFloatRes_FREM(N); break; case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; + case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break; case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; @@ -118,7 +119,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N, SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { // Convert the inputs to integers, and build a new pair out of them. - return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(), + return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), BitConvertToInteger(N->getOperand(0)), @@ -133,7 +134,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NewOp.getValueType().getVectorElementType(), NewOp, N->getOperand(1)); } @@ -147,7 +148,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { API.clearBit(Size-1); SDValue Mask = DAG.getConstant(API, NVT); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask); + return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask); } SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { @@ -160,7 +161,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { RTLIB::ADD_F80, RTLIB::ADD_F128, RTLIB::ADD_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { @@ -172,13 +173,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { RTLIB::CEIL_F80, RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(0)); SDValue RHS = BitConvertToInteger(N->getOperand(1)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT LVT = LHS.getValueType(); EVT RVT = RHS.getValueType(); @@ -226,7 +227,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { @@ -239,7 +240,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { @@ -251,7 +252,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { RTLIB::EXP_F80, RTLIB::EXP_F128, RTLIB::EXP_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { @@ -263,7 +264,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { @@ -275,7 +276,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { @@ -287,7 +288,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { RTLIB::LOG_F80, RTLIB::LOG_F128, RTLIB::LOG_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { @@ -299,7 +300,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { RTLIB::LOG2_F80, RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { @@ -311,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { RTLIB::LOG10_F80, RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { @@ -325,7 +326,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - NVT, Ops, 3, false, N->getDebugLoc()); + NVT, Ops, 3, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { @@ -338,7 +339,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { @@ -350,7 +351,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { RTLIB::NEARBYINT_F80, RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { @@ -364,7 +365,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { @@ -372,7 +373,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -381,7 +382,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, - N->getDebugLoc()); + SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -389,7 +390,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { @@ -402,7 +403,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { @@ -416,7 +417,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { RTLIB::POWI_F80, RTLIB::POWI_F128, RTLIB::POWI_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { @@ -429,7 +430,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { RTLIB::REM_F80, RTLIB::REM_F128, RTLIB::REM_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { @@ -441,7 +442,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { RTLIB::RINT_F80, RTLIB::RINT_F128, RTLIB::RINT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { @@ -453,7 +466,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { RTLIB::SIN_F80, RTLIB::SIN_F128, RTLIB::SIN_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { @@ -465,7 +478,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { @@ -478,7 +491,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { @@ -490,21 +503,22 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { LoadSDNode *L = cast<LoadSDNode>(N); EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue NewL; if (L->getExtensionType() == ISD::NON_EXTLOAD) { NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getPointerInfo(), NVT, L->isVolatile(), - L->isNonTemporal(), false, L->getAlignment()); + L->getPointerInfo(), NVT, L->isVolatile(), + L->isNonTemporal(), false, L->getAlignment(), + L->getTBAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -516,7 +530,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getMemoryVT(), dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), L->getMemoryVT(), L->isVolatile(), - L->isNonTemporal(), false, L->getAlignment()); + L->isNonTemporal(), false, L->getAlignment(), + L->getTBAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -526,14 +541,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(1)); SDValue RHS = GetSoftenedFloat(N->getOperand(2)); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), - LHS.getValueType(), N->getOperand(0),LHS,RHS); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, RHS); } SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(2)); SDValue RHS = GetSoftenedFloat(N->getOperand(3)); - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(), N->getOperand(0), N->getOperand(1), LHS, RHS, N->getOperand(4)); } @@ -548,7 +563,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { SDValue Ptr = N->getOperand(1); // Get the pointer. EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue NewVAARG; NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), @@ -565,7 +580,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { EVT SVT = N->getOperand(0).getValueType(); EVT RVT = N->getValueType(0); EVT NVT = EVT(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly @@ -585,7 +600,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { NVT, N->getOperand(0)); return TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - &Op, 1, false, dl); + &Op, 1, false, dl).first; } @@ -633,7 +648,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { } SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), GetSoftenedFloat(N->getOperand(0))); } @@ -645,7 +660,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { @@ -655,7 +670,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -676,7 +691,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { @@ -684,14 +699,14 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) { EVT RVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16; SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { @@ -701,7 +716,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -724,7 +739,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); // If softenSetCCOperands returned a scalar, use it. if (NewRHS.getNode() == 0) { @@ -744,7 +759,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "Can only soften the stored value!"); StoreSDNode *ST = cast<StoreSDNode>(N); SDValue Val = ST->getValue(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (ST->isTruncatingStore()) // Do an FP_ROUND followed by a non-truncating store. @@ -754,9 +769,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { Val = GetSoftenedFloat(Val); return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(), - ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + ST->getMemOperand()); } @@ -817,6 +830,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; + case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break; case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; @@ -850,14 +864,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0) == MVT::ppcf128 && "Logic only correct for ppcf128!"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Tmp; GetExpandedFloat(N->getOperand(0), Lo, Tmp); Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp); // Lo = Hi==fabs(Hi) ? Lo : -Lo; - Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo, + Lo = DAG.getSelectCC(dl, Tmp, Hi, Lo, DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo), - DAG.getCondCode(ISD::SETEQ)); + ISD::SETEQ); } void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, @@ -912,7 +926,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -986,7 +1000,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), N->getValueType(0), Ops, 3, false, - N->getDebugLoc()); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1000,7 +1014,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1018,7 +1032,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedFloat(N->getOperand(0), Lo, Hi); Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo); Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi); @@ -1027,7 +1041,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0)); + Hi = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), NVT, N->getOperand(0)); Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), APInt(NVT.getSizeInBits(), 0)), NVT); } @@ -1072,6 +1086,18 @@ void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, GetPairElements(Call, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), @@ -1102,7 +1128,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1127,15 +1153,14 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, LoadSDNode *LD = cast<LoadSDNode>(N); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); assert(NVT.isByteSized() && "Expanded type not byte sized!"); assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, - LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->getMemoryVT(), LD->getMemOperand()); // Remember the chain. Chain = Hi.getValue(1); @@ -1157,7 +1182,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue Src = N->getOperand(0); EVT SrcVT = Src.getValueType(); bool isSigned = N->getOpcode() == ISD::SINT_TO_FP; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // First do an SINT_TO_FP, whether the original was signed or unsigned. // When promoting partial word types to i32 we must honor the signedness, @@ -1181,7 +1206,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); - Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl); + Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl).first; GetPairElements(Hi, Lo, Hi); } @@ -1216,8 +1241,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble, APInt(128, Parts)), MVT::ppcf128)); - Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT), - Lo, Hi, DAG.getCondCode(ISD::SETLT)); + Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, SrcVT), + Lo, Hi, ISD::SETLT); GetPairElements(Lo, Lo, Hi); } @@ -1251,6 +1276,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; + case ISD::FCOPYSIGN: Res = ExpandFloatOp_FCOPYSIGN(N); break; case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; @@ -1280,7 +1306,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - DebugLoc dl) { + SDLoc dl) { SDValue LHSLo, LHSHi, RHSLo, RHSHi; GetExpandedFloat(NewLHS, LHSLo, LHSHi); GetExpandedFloat(NewRHS, RHSLo, RHSHi); @@ -1293,14 +1319,14 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, // FCMPU crN, lo1, lo2 // The following can be improved, but not that much. SDValue Tmp1, Tmp2, Tmp3; - Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETOEQ); - Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()), + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo, RHSLo, CCCode); Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); - Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETUNE); - Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, CCCode); Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3); @@ -1310,7 +1336,7 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); - FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -1325,19 +1351,30 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { N->getOperand(4)), 0); } +SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) { + assert(N->getOperand(1).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Lo, Hi; + GetExpandedFloat(N->getOperand(1), Lo, Hi); + // The ppcf128 value is providing only the sign; take it from the + // higher-order double (which must have the larger magnitude). + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), + N->getValueType(0), N->getOperand(0), Hi); +} + SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { assert(N->getOperand(0).getValueType() == MVT::ppcf128 && "Logic only correct for ppcf128!"); SDValue Lo, Hi; GetExpandedFloat(N->getOperand(0), Lo, Hi); // Round it the rest of the way (e.g. to f32) if needed. - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), N->getValueType(0), Hi, N->getOperand(1)); } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { EVT RVT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on // PPC (the libcall is not available). FIXME: Do this in a less hacky way. @@ -1353,12 +1390,12 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl); + return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { EVT RVT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on // PPC (the libcall is not available). FIXME: Do this in a less hacky way. @@ -1370,29 +1407,29 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128); // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. - return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp, - DAG.getNode(ISD::ADD, dl, MVT::i32, - DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, - DAG.getNode(ISD::FSUB, dl, - MVT::ppcf128, - N->getOperand(0), - Tmp)), - DAG.getConstant(0x80000000, MVT::i32)), - DAG.getNode(ISD::FP_TO_SINT, dl, - MVT::i32, N->getOperand(0)), - DAG.getCondCode(ISD::SETGE)); + return DAG.getSelectCC(dl, N->getOperand(0), Tmp, + DAG.getNode(ISD::ADD, dl, MVT::i32, + DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, + DAG.getNode(ISD::FSUB, dl, + MVT::ppcf128, + N->getOperand(0), + Tmp)), + DAG.getConstant(0x80000000, MVT::i32)), + DAG.getNode(ISD::FP_TO_SINT, dl, + MVT::i32, N->getOperand(0)), + ISD::SETGE); } RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1, - false, dl); + false, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); - FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -1410,7 +1447,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); - FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, use it. if (NewRHS.getNode() == 0) { @@ -1444,8 +1481,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { SDValue Lo, Hi; GetExpandedOp(ST->getValue(), Lo, Hi); - return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr, - ST->getPointerInfo(), - ST->getMemoryVT(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + return DAG.getTruncStore(Chain, SDLoc(N), Hi, Ptr, + ST->getMemoryVT(), ST->getMemOperand()); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index cd2f060..4255948 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -153,20 +153,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N, SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) { // Sign-extend the new bits, and continue the assertion. SDValue Op = SExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::AssertSext, N->getDebugLoc(), + return DAG.getNode(ISD::AssertSext, SDLoc(N), Op.getValueType(), Op, N->getOperand(1)); } SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) { // Zero the new bits, and continue the assertion. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::AssertZext, N->getDebugLoc(), + return DAG.getNode(ISD::AssertZext, SDLoc(N), Op.getValueType(), Op, N->getOperand(1)); } SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { EVT ResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), ResVT, N->getChain(), N->getBasePtr(), N->getMemOperand(), N->getOrdering(), @@ -179,7 +179,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { SDValue Op2 = GetPromotedInteger(N->getOperand(2)); - SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(), N->getOrdering(), @@ -193,7 +193,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { SDValue Op2 = GetPromotedInteger(N->getOperand(2)); SDValue Op3 = GetPromotedInteger(N->getOperand(3)); - SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), N->getChain(), N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getOrdering(), N->getSynchScope()); @@ -209,7 +209,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); switch (getTypeAction(InVT)) { case TargetLowering::TypeLegal: @@ -264,7 +264,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), @@ -274,7 +274,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { // The pair element type may be legal, or may not promote to the same type as // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases. - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), JoinIntegers(N->getOperand(0), N->getOperand(1))); @@ -283,7 +283,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { EVT VT = N->getValueType(0); // FIXME there is no actual debug info here - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Zero extend things like i1, sign extend everything else. It shouldn't // matter in theory which one we pick, but this tends to give better code? unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; @@ -301,7 +301,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) { CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) && "can only promote integers"); EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0), + return DAG.getConvertRndSat(OutVT, SDLoc(N), N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), N->getOperand(4), CvtCode); } @@ -309,7 +309,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { // Zero extend to the promoted type and do the count there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); Op = DAG.getNode(N->getOpcode(), dl, NVT, Op); @@ -322,14 +322,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) { // Zero extend to the promoted type and do the count there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op); + return DAG.getNode(ISD::CTPOP, SDLoc(N), Op.getValueType(), Op); } SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (N->getOpcode() == ISD::CTTZ) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off @@ -342,7 +342,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0), N->getOperand(1)); @@ -351,7 +351,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned NewOpc = N->getOpcode(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT @@ -374,7 +374,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); @@ -384,7 +384,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (getTypeAction(N->getOperand(0).getValueType()) == TargetLowering::TypePromoteInteger) { @@ -415,11 +415,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), - N->getPointerInfo(), - N->getMemoryVT(), N->isVolatile(), - N->isNonTemporal(), N->getAlignment()); + N->getMemoryVT(), N->getMemOperand()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -433,7 +431,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); EVT ValueVTs[] = { N->getValueType(0), NVT }; SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; - SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(), + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), DAG.getVTList(ValueVTs, 2), Ops, 2); // Modified the sum result - switch anything that used the old sum to use @@ -453,7 +451,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { SDValue RHS = SExtPromotedInteger(N->getOperand(1)); EVT OVT = N->getOperand(0).getValueType(); EVT NVT = LHS.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Do the arithmetic in the larger type. unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB; @@ -476,15 +474,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) { // Sign extend the input. SDValue LHS = SExtPromotedInteger(N->getOperand(0)); SDValue RHS = SExtPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), - LHS.getValueType(), N->getOperand(0),LHS,RHS); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { @@ -492,23 +490,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { EVT OpTy = N->getOperand(1).getValueType(); // Promote all the way up to the canonical SetCC type. - Mask = PromoteTargetBoolean(Mask, TLI.getSetCCResultType(OpTy)); + Mask = PromoteTargetBoolean(Mask, getSetCCResultType(OpTy)); SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); - return DAG.getNode(ISD::VSELECT, N->getDebugLoc(), + return DAG.getNode(ISD::VSELECT, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(2)); SDValue RHS = GetPromotedInteger(N->getOperand(3)); - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(), N->getOperand(0), N->getOperand(1), LHS, RHS, N->getOperand(4)); } SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { - EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType()); + EVT SVT = getSetCCResultType(N->getOperand(0).getValueType()); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -517,13 +515,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { if (!TLI.isTypeLegal(SVT)) SVT = NVT; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() && "Vector compare must return a vector result!"); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (LHS.getValueType() != RHS.getValueType()) { + if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger && + !LHS.getValueType().isVector()) + LHS = GetPromotedInteger(LHS); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger && + !RHS.getValueType().isVector()) + RHS = GetPromotedInteger(RHS); + } + // Get the SETCC result using the canonical SETCC type. - SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), - N->getOperand(1), N->getOperand(2)); + SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, LHS, RHS, + N->getOperand(2)); assert(NVT.bitsLE(SVT) && "Integer type overpromoted?"); // Convert to the expected type. @@ -534,12 +543,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { SDValue Res = GetPromotedInteger(N->getOperand(0)); SDValue Amt = N->getOperand(1); Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; - return DAG.getNode(ISD::SHL, N->getDebugLoc(), Res.getValueType(), Res, Amt); + return DAG.getNode(ISD::SHL, SDLoc(N), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), Op.getValueType(), Op, N->getOperand(1)); } @@ -549,7 +558,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { // that too is okay if they are integer operations. SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = GetPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); } @@ -558,7 +567,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { SDValue Res = SExtPromotedInteger(N->getOperand(0)); SDValue Amt = N->getOperand(1); Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; - return DAG.getNode(ISD::SRA, N->getDebugLoc(), Res.getValueType(), Res, Amt); + return DAG.getNode(ISD::SRA, SDLoc(N), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { @@ -566,14 +575,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { SDValue Res = ZExtPromotedInteger(N->getOperand(0)); SDValue Amt = N->getOperand(1); Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; - return DAG.getNode(ISD::SRL, N->getDebugLoc(), Res.getValueType(), Res, Amt); + return DAG.getNode(ISD::SRL, SDLoc(N), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Res; SDValue InOp = N->getOperand(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); switch (getTypeAction(InOp.getValueType())) { default: llvm_unreachable("Unknown type action!"); @@ -618,7 +627,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); EVT OVT = N->getOperand(0).getValueType(); EVT NVT = LHS.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Do the arithmetic in the larger type. unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB; @@ -642,7 +651,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { return PromoteIntRes_Overflow(N); SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); EVT SmallVT = LHS.getValueType(); // To determine if the result overflowed in a larger type, we extend the @@ -690,7 +699,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { // Zero extend the input. SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); } @@ -703,7 +712,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { SDValue Chain = N->getOperand(0); // Get the chain. SDValue Ptr = N->getOperand(1); // Get the pointer. EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); MVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT); @@ -847,12 +856,12 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op); + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Op); } SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) { SDValue Op2 = GetPromotedInteger(N->getOperand(2)); - return DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), N->getMemoryVT(), + return DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(), N->getOrdering(), N->getSynchScope()); } @@ -881,7 +890,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "only know how to promote condition"); // Promote all the way up to the canonical SetCC type. - EVT SVT = TLI.getSetCCResultType(MVT::Other); + EVT SVT = getSetCCResultType(MVT::Other); SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); // The chain (Op#0) and basic block destination (Op#2) are always legal types. @@ -895,7 +904,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { SDValue Lo = ZExtPromotedInteger(N->getOperand(0)); SDValue Hi = GetPromotedInteger(N->getOperand(1)); assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi, DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy())); @@ -908,7 +917,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { // type does not have a strange size (eg: it is not i1). EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); - assert(!(NumElts & 1) && "Legal vector of one illegal element?"); + assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) && + "Legal vector of one illegal element?"); // Promote the inserted value. The type does not need to match the // vector element type. Check that any extra bits introduced will be @@ -931,7 +941,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) && "can only promote integer arguments"); SDValue InOp = GetPromotedInteger(N->getOperand(0)); - return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp, + return DAG.getConvertRndSat(N->getValueType(0), SDLoc(N), InOp, N->getOperand(1), N->getOperand(2), N->getOperand(3), N->getOperand(4), CvtCode); } @@ -955,7 +965,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, assert(OpNo == 2 && "Different operand and result vector types?"); // Promote the index. - SDValue Idx = ZExtPromotedInteger(N->getOperand(2)); + SDValue Idx = DAG.getZExtOrTrunc(N->getOperand(2), SDLoc(N), + TLI.getVectorIdxTy()); return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Idx), 0); } @@ -973,7 +984,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { EVT OpTy = N->getOperand(1).getValueType(); // Promote all the way up to the canonical SetCC type. - EVT SVT = TLI.getSetCCResultType(N->getOpcode() == ISD::SELECT ? + EVT SVT = getSetCCResultType(N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy); Cond = PromoteTargetBoolean(Cond, SVT); @@ -1011,7 +1022,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op, DAG.getValueType(N->getOperand(0).getValueType())); @@ -1025,22 +1036,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); SDValue Ch = N->getChain(), Ptr = N->getBasePtr(); - unsigned Alignment = N->getAlignment(); - bool isVolatile = N->isVolatile(); - bool isNonTemporal = N->isNonTemporal(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value. // Truncate the value and store the result. - return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(), - N->getMemoryVT(), - isVolatile, isNonTemporal, Alignment); + return DAG.getTruncStore(Ch, dl, Val, Ptr, + N->getMemoryVT(), N->getMemOperand()); } SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op); } SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { @@ -1049,7 +1056,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Op = GetPromotedInteger(N->getOperand(0)); Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); return DAG.getZeroExtendInReg(Op, dl, @@ -1127,7 +1134,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ATOMIC_LOAD_MAX: case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_LOAD_UMAX: - case ISD::ATOMIC_SWAP: { + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_CMP_SWAP: { std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N); SplitInteger(Tmp.first, Lo, Hi); ReplaceValueWith(SDValue(N, 1), Tmp.second); @@ -1180,6 +1188,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; } break; case ISD::ATOMIC_CMP_SWAP: @@ -1189,6 +1198,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; } break; case ISD::ATOMIC_LOAD_ADD: @@ -1198,6 +1208,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; } break; case ISD::ATOMIC_LOAD_SUB: @@ -1207,6 +1218,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; } break; case ISD::ATOMIC_LOAD_AND: @@ -1216,6 +1228,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; } break; case ISD::ATOMIC_LOAD_OR: @@ -1225,6 +1238,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; } break; case ISD::ATOMIC_LOAD_XOR: @@ -1234,6 +1248,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; } break; case ISD::ATOMIC_LOAD_NAND: @@ -1243,6 +1258,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; } break; } @@ -1254,7 +1270,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { /// and the shift amount is a constant 'Amt'. Expand the operation. void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi) { - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Expand the incoming operand to be shifted, so that we have its parts SDValue InL, InH; GetExpandedInteger(N->getOperand(0), InL, InH); @@ -1352,7 +1368,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { unsigned NVTBits = NVT.getScalarType().getSizeInBits(); assert(isPowerOf2_32(NVTBits) && "Expanded integer type size not a power of two!"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); APInt KnownZero, KnownOne; @@ -1439,7 +1455,7 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { unsigned NVTBits = NVT.getSizeInBits(); assert(isPowerOf2_32(NVTBits) && "Expanded integer type size not a power of two!"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Get the incoming operand to be shifted. SDValue InL, InH; @@ -1448,7 +1464,7 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy); SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode); SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt); - SDValue isShort = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy), + SDValue isShort = DAG.getSetCC(dl, getSetCCResultType(ShTy), Amt, NVBitsNode, ISD::SETULT); SDValue LoS, HiS, LoL, HiL; @@ -1467,8 +1483,8 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { LoL = DAG.getConstant(0, NVT); // Lo part is zero. HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part. - Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; case ISD::SRL: // Short: ShAmt < NVTBits @@ -1483,8 +1499,8 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { HiL = DAG.getConstant(0, NVT); // Hi part is zero. LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part. - Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; case ISD::SRA: // Short: ShAmt < NVTBits @@ -1500,15 +1516,15 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { DAG.getConstant(NVTBits-1, ShTy)); LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part. - Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; } } void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Expand the subcomponents. SDValue LHSL, LHSH, RHSL, RHSH; GetExpandedInteger(N->getOperand(0), LHSL, LHSH); @@ -1545,25 +1561,25 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, if (N->getOpcode() == ISD::ADD) { Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); - SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0], + SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); - SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1, - DAG.getConstant(1, NVT), - DAG.getConstant(0, NVT)); - SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1], + SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); + SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1], ISD::SETULT); - SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2, - DAG.getConstant(1, NVT), Carry1); + SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2, + DAG.getConstant(1, NVT), Carry1); Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); } else { Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2); SDValue Cmp = - DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()), + DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); - SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, - DAG.getConstant(1, NVT), - DAG.getConstant(0, NVT)); + SDValue Borrow = DAG.getSelect(dl, NVT, Cmp, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); } } @@ -1572,7 +1588,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N, SDValue &Lo, SDValue &Hi) { // Expand the subcomponents. SDValue LHSL, LHSH, RHSL, RHSH; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), LHSL, LHSH); GetExpandedInteger(N->getOperand(1), RHSL, RHSH); SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue); @@ -1598,7 +1614,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, SDValue &Lo, SDValue &Hi) { // Expand the subcomponents. SDValue LHSL, LHSH, RHSL, RHSH; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), LHSL, LHSH); GetExpandedInteger(N->getOperand(1), RHSL, RHSH); SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue); @@ -1623,7 +1639,7 @@ void DAGTypeLegalizer::ExpandIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo, void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Op = N->getOperand(0); if (Op.getValueType().bitsLE(NVT)) { // The low part is any extension of the input (which degenerates to a copy). @@ -1645,7 +1661,7 @@ void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT NVT = Lo.getValueType(); EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); @@ -1666,7 +1682,7 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT NVT = Lo.getValueType(); EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); @@ -1686,7 +1702,7 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands. Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo); Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi); @@ -1703,26 +1719,26 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32) GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT NVT = Lo.getValueType(); - SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi, + SDValue HiNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi, DAG.getConstant(0, NVT), ISD::SETNE); SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo); SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi); - Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ, - DAG.getNode(ISD::ADD, dl, NVT, LoLZ, - DAG.getConstant(NVT.getSizeInBits(), NVT))); + Lo = DAG.getSelect(dl, NVT, HiNotZero, HiLZ, + DAG.getNode(ISD::ADD, dl, NVT, LoLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); Hi = DAG.getConstant(0, NVT); } void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo) GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT NVT = Lo.getValueType(); @@ -1733,42 +1749,44 @@ void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32) GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT NVT = Lo.getValueType(); - SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, + SDValue LoNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, DAG.getConstant(0, NVT), ISD::SETNE); SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo); SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi); - Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ, - DAG.getNode(ISD::ADD, dl, NVT, HiLZ, - DAG.getConstant(NVT.getSizeInBits(), NVT))); + Lo = DAG.getSelect(dl, NVT, LoNotZero, LoLZ, + DAG.getNode(ISD::ADD, dl, NVT, HiLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); Hi = DAG.getConstant(0, NVT); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, dl), + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, + dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, dl), + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, + dl).first, Lo, Hi); } @@ -1790,7 +1808,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); bool isInvariant = N->isInvariant(); - DebugLoc dl = N->getDebugLoc(); + const MDNode *TBAAInfo = N->getTBAAInfo(); + SDLoc dl(N); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -1798,7 +1817,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, EVT MemVT = N->getMemoryVT(); Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), - MemVT, isVolatile, isNonTemporal, Alignment); + MemVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); // Remember the chain. Ch = Lo.getValue(1); @@ -1820,7 +1839,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } else if (TLI.isLittleEndian()) { // Little-endian - low bits are at low addresses. Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -1829,11 +1849,11 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1851,17 +1871,17 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); // Load the rest of the low bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1889,7 +1909,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue LL, LH, RL, RH; GetExpandedInteger(N->getOperand(0), LL, LH); GetExpandedInteger(N->getOperand(1), RL, RH); @@ -1901,7 +1921,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT); bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT); @@ -1984,7 +2004,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, dl), + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, + dl).first, Lo, Hi); } @@ -1992,7 +2013,7 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, SDValue &Lo, SDValue &Hi) { SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); // Expand the result by simply replacing it with the equivalent // non-overflow-checking operation. @@ -2033,7 +2054,7 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2047,13 +2068,13 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // If we can emit an efficient shift operation, do so now. Check to see if // the RHS is a constant. @@ -2142,7 +2163,8 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl).first, Lo, + Hi); return; } @@ -2153,7 +2175,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Op = N->getOperand(0); if (Op.getValueType().bitsLE(NVT)) { // The low part is sign extension of the input (degenerates to a copy). @@ -2183,7 +2205,7 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, void DAGTypeLegalizer:: ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); @@ -2211,7 +2233,7 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2225,13 +2247,13 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0)); Hi = DAG.getNode(ISD::SRL, dl, N->getOperand(0).getValueType(), N->getOperand(0), @@ -2243,7 +2265,7 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Expand the result by simply replacing it with the equivalent // non-overflow-checking operation. @@ -2265,7 +2287,7 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // A divide for UMULO should be faster than a function call. if (N->getOpcode() == ISD::UMULO) { @@ -2276,16 +2298,16 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, // A divide for UMULO will be faster than a function call. Select to // make sure we aren't using 0. - SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), + SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT), RHS, DAG.getConstant(0, VT), ISD::SETEQ); - SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero, - DAG.getConstant(1, VT), RHS); + SDValue NotZero = DAG.getSelect(dl, VT, isZero, + DAG.getConstant(1, VT), RHS); SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero); SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS, ISD::SETNE); - Overflow = DAG.getNode(ISD::SELECT, dl, N->getValueType(1), isZero, - DAG.getConstant(0, N->getValueType(1)), - Overflow); + Overflow = DAG.getSelect(dl, N->getValueType(1), isZero, + DAG.getConstant(0, N->getValueType(1)), + Overflow); ReplaceValueWith(SDValue(N, 1), Overflow); return; } @@ -2293,7 +2315,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); EVT PtrVT = TLI.getPointerTy(); Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); - + // Replace this with a libcall that will check overflow. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i32) @@ -2351,7 +2373,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2365,13 +2387,13 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2385,13 +2407,13 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Op = N->getOperand(0); if (Op.getValueType().bitsLE(NVT)) { // The low part is zero extension of the input (degenerates to a copy). @@ -2418,7 +2440,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = cast<AtomicSDNode>(N)->getMemoryVT(); SDValue Zero = DAG.getConstant(0, VT); SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT, @@ -2498,7 +2520,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - DebugLoc dl) { + SDLoc dl) { SDValue LHSLo, LHSHi, RHSLo, RHSHi; GetExpandedInteger(NewLHS, LHSLo, LHSHi); GetExpandedInteger(NewRHS, RHSLo, RHSHi); @@ -2555,16 +2577,16 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL); SDValue Tmp1, Tmp2; - Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()), + Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); if (!Tmp1.getNode()) - Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()), + Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo, RHSLo, LowCC); - Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()), + Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); if (!Tmp2.getNode()) Tmp2 = DAG.getNode(ISD::SETCC, dl, - TLI.getSetCCResultType(LHSHi.getValueType()), + getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, DAG.getCondCode(CCCode)); ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode()); @@ -2584,21 +2606,21 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, return; } - NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()), + NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETEQ, false, DagCombineInfo, dl); if (!NewLHS.getNode()) - NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + NewLHS = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETEQ); - NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(), - NewLHS, Tmp1, Tmp2); + NewLHS = DAG.getSelect(dl, Tmp1.getValueType(), + NewLHS, Tmp1, Tmp2); NewRHS = SDValue(); } SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); - IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -2616,7 +2638,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); - IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -2634,7 +2656,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); - IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, use it. if (NewRHS.getNode() == 0) { @@ -2672,7 +2694,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2689,7 +2711,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); - DebugLoc dl = N->getDebugLoc(); + const MDNode *TBAAInfo = N->getTBAAInfo(); + SDLoc dl(N); SDValue Lo, Hi; assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -2698,7 +2721,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { GetExpandedInteger(N->getValue(), Lo, Hi); return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), N->getMemoryVT(), isVolatile, isNonTemporal, - Alignment); + Alignment, TBAAInfo); } if (TLI.isLittleEndian()) { @@ -2706,7 +2729,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { GetExpandedInteger(N->getValue(), Lo, Hi); Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2715,11 +2738,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2747,17 +2770,17 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Store both the high bits and maybe some of the low bits. Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), - HiVT, isVolatile, isNonTemporal, Alignment); + HiVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); // Store the lowest ExcessBits bits in the second half. Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2765,14 +2788,14 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { SDValue InL, InH; GetExpandedInteger(N->getOperand(0), InL, InH); // Just truncate the low part of the source. - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), InL); } SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Op = N->getOperand(0); EVT SrcVT = Op.getValueType(); EVT DstVT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // The following optimization is valid only if every value in SrcVT (when // treated as signed) is representable in DstVT. Check that the mantissa @@ -2806,7 +2829,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Lo, Hi; GetExpandedInteger(Op, Lo, Hi); SDValue SignSet = DAG.getSetCC(dl, - TLI.getSetCCResultType(Hi.getValueType()), + getSetCCResultType(Hi.getValueType()), Hi, DAG.getConstant(0, Hi.getValueType()), ISD::SETLT); @@ -2819,10 +2842,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Zero = DAG.getIntPtrConstant(0); SDValue Four = DAG.getIntPtrConstant(4); if (TLI.isBigEndian()) std::swap(Zero, Four); - SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, - Zero, Four); + SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, + Zero, Four); unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment(); - FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset); + FudgePtr = DAG.getNode(ISD::ADD, dl, FudgePtr.getValueType(), + FudgePtr, Offset); Alignment = std::min(Alignment, 4u); // Load the value out, extending it from f32 to the destination float type. @@ -2839,11 +2863,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this UINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl).first; } SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, cast<AtomicSDNode>(N)->getMemoryVT(), N->getOperand(0), @@ -2865,7 +2889,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { unsigned OutNumElems = OutVT.getVectorNumElements(); EVT NOutVTElem = NOutVT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue BaseIdx = N->getOperand(1); SmallVector<SDValue, 8> Ops; @@ -2874,7 +2898,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { // Extract the element from the original vector. SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(), - BaseIdx, DAG.getIntPtrConstant(i)); + BaseIdx, DAG.getConstant(i, BaseIdx.getValueType())); SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InVT.getVectorElementType(), N->getOperand(0), Index); @@ -2890,7 +2914,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N); EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned NumElts = VT.getVectorNumElements(); SmallVector<int, 8> NewMask; @@ -2913,12 +2937,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { unsigned NumElems = N->getNumOperands(); EVT NOutVTElem = NOutVT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SmallVector<SDValue, 8> Ops; Ops.reserve(NumElems); for (unsigned i = 0; i != NumElems; ++i) { - SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); + SDValue Op; + // BUILD_VECTOR integer operand types are allowed to be larger than the + // result's element type. This may still be true after the promotion. For + // example, we might be promoting (<v?i1> = BV <i32>, <i32>, ...) to + // (v?i16 = BV <i32>, <i32>, ...), and we can't any_extend <i32> to <i16>. + if (N->getOperand(i).getValueType().bitsLT(NOutVTElem)) + Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); + else + Op = N->getOperand(i); Ops.push_back(Op); } @@ -2927,7 +2959,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); assert(!N->getOperand(0).getValueType().isVector() && "Input must be a scalar"); @@ -2943,7 +2975,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); @@ -2964,7 +2996,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { SDValue Op = N->getOperand(i); for (unsigned j = 0; j < NumElem; ++j) { SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - InElemTy, Op, DAG.getIntPtrConstant(j)); + InElemTy, Op, DAG.getConstant(j, + TLI.getVectorIdxTy())); Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); } } @@ -2979,7 +3012,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { EVT NOutVTElem = NOutVT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue V0 = GetPromotedInteger(N->getOperand(0)); SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl, @@ -2989,9 +3022,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue V0 = GetPromotedInteger(N->getOperand(0)); - SDValue V1 = N->getOperand(1); + SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl, TLI.getVectorIdxTy()); SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, V0->getValueType(0).getScalarType(), V0, V1); @@ -3002,7 +3035,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned NumElems = N->getNumOperands(); EVT RetSclrTy = N->getValueType(0).getVectorElementType(); @@ -3019,7 +3052,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { for (unsigned i=0; i<NumElem; ++i) { // Extract element from incoming vector SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, - Incoming, DAG.getIntPtrConstant(i)); + Incoming, DAG.getConstant(i, TLI.getVectorIdxTy())); SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex); NewOps.push_back(Tr); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index a7d5fb0..eb13230 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -738,9 +738,6 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { SDValue &OpEntry = PromotedIntegers[Op]; assert(OpEntry.getNode() == 0 && "Node is already promoted!"); OpEntry = Result; - - // Propagate node ordering - DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { @@ -752,9 +749,6 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { SDValue &OpEntry = SoftenedFloats[Op]; assert(OpEntry.getNode() == 0 && "Node is already converted to integer!"); OpEntry = Result; - - // Propagate node ordering - DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { @@ -769,9 +763,6 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { SDValue &OpEntry = ScalarizedVectors[Op]; assert(OpEntry.getNode() == 0 && "Node is already scalarized!"); OpEntry = Result; - - // Propagate node ordering - DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, @@ -799,10 +790,6 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; - - // Propagate ordering - DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); - DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, @@ -830,10 +817,6 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; - - // Propagate ordering - DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); - DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, @@ -863,10 +846,6 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already split"); Entry.first = Lo; Entry.second = Hi; - - // Propagate ordering - DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); - DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { @@ -878,9 +857,6 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { SDValue &OpEntry = WidenedVectors[Op]; assert(OpEntry.getNode() == 0 && "Node already widened!"); OpEntry = Result; - - // Propagate node ordering - DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } @@ -891,7 +867,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { /// BitConvertToInteger - Convert to an integer of the same size. SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { unsigned BitWidth = Op.getValueType().getSizeInBits(); - return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(Op), EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op); } @@ -902,13 +878,13 @@ SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits(); EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); unsigned NumElts = Op.getValueType().getVectorNumElements(); - return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(Op), EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op); } SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, EVT DestVT) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Create the stack frame object. Make sure it is aligned for both // the source and destination types. SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT); @@ -948,8 +924,6 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { "Custom lowering returned the wrong number of results!"); for (unsigned i = 0, e = Results.size(); i != e; ++i) { ReplaceValueWith(SDValue(N, i), Results[i]); - // Propagate node ordering - DAG.AssignOrdering(Results[i].getNode(), DAG.GetOrdering(N)); } return true; } @@ -984,25 +958,11 @@ SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) { return SDValue(N->getOperand(ResNo)); } -/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type -/// which is split into two not necessarily identical pieces. -void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) { - // Currently all types are split in half. - if (!InVT.isVector()) { - LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); - } else { - unsigned NumElements = InVT.getVectorNumElements(); - assert(!(NumElements & 1) && "Splitting vector, but not in half!"); - LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), NumElements/2); - } -} - /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and /// high parts of the given value. void DAGTypeLegalizer::GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = Pair.getDebugLoc(); + SDLoc dl(Pair); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType()); Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, DAG.getIntPtrConstant(0)); @@ -1012,12 +972,9 @@ void DAGTypeLegalizer::GetPairElements(SDValue Pair, SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index) { - DebugLoc dl = Index.getDebugLoc(); + SDLoc dl(Index); // Make sure the index type is big enough to compute in. - if (Index.getValueType().bitsGT(TLI.getPointerTy())) - Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index); - else - Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index); + Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy()); // Calculate the element offset and add it to the pointer. unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. @@ -1029,9 +986,9 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, /// JoinIntegers - Build an integer with low bits Lo and high bits Hi. SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { - // Arbitrarily use dlHi for result DebugLoc - DebugLoc dlHi = Hi.getDebugLoc(); - DebugLoc dlLo = Lo.getDebugLoc(); + // Arbitrarily use dlHi for result SDLoc + SDLoc dlHi(Hi); + SDLoc dlLo(Lo); EVT LVT = Lo.getValueType(); EVT HVT = Hi.getValueType(); EVT NVT = EVT::getIntegerVT(*DAG.getContext(), @@ -1048,22 +1005,25 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned) { unsigned NumOps = N->getNumOperands(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, + dl).first; } else if (NumOps == 1) { SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, + dl).first; } else if (NumOps == 2) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, + dl).first; } SmallVector<SDValue, 8> Ops(NumOps); for (unsigned i = 0; i < NumOps; ++i) Ops[i] = N->getOperand(i); return TLI.makeLibCall(DAG, LC, N->getValueType(0), - &Ops[0], NumOps, isSigned, dl); + &Ops[0], NumOps, isSigned, dl).first; } // ExpandChainLibCall - Expand a node into a call to a libcall. Similar to @@ -1093,7 +1053,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, Node->getDebugLoc()); + Callee, Args, DAG, SDLoc(Node)); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); return CallInfo; @@ -1103,7 +1063,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, /// of the given type. A target boolean is an integer value, not necessarily of /// type i1, the bits of which conform to getBooleanContents. SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) { - DebugLoc dl = Bool.getDebugLoc(); + SDLoc dl(Bool); ISD::NodeType ExtendCode = TargetLowering::getExtendForContent(TLI.getBooleanContents(VT.isVector())); return DAG.getNode(ExtendCode, dl, VT, Bool); @@ -1114,7 +1074,7 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) { void DAGTypeLegalizer::SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() == Op.getValueType().getSizeInBits() && "Invalid integer splitting!"); Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 1c4274a..13bb08f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1,4 +1,4 @@ -//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===// +//===-- LegalizeTypes.h - DAG Type Legalizer class definition ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -73,6 +73,10 @@ private: return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal; } + EVT getSetCCResultType(EVT VT) const { + return TLI.getSetCCResultType(*DAG.getContext(), VT); + } + /// IgnoreNodeResults - Pretend all of this node's results are legal. bool IgnoreNodeResults(SDNode *N) const { return N->getOpcode() == ISD::TargetConstant; @@ -195,7 +199,7 @@ private: /// final size. SDValue SExtPromotedInteger(SDValue Op) { EVT OldVT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); Op = GetPromotedInteger(Op); return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op, DAG.getValueType(OldVT)); @@ -205,7 +209,7 @@ private: /// final size. SDValue ZExtPromotedInteger(SDValue Op) { EVT OldVT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); Op = GetPromotedInteger(Op); return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType()); } @@ -357,7 +361,7 @@ private: SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N); void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, - ISD::CondCode &CCCode, DebugLoc dl); + ISD::CondCode &CCCode, SDLoc dl); //===--------------------------------------------------------------------===// // Float to Integer Conversion Support: LegalizeFloatTypes.cpp @@ -406,6 +410,7 @@ private: SDValue SoftenFloatRes_FPOWI(SDNode *N); SDValue SoftenFloatRes_FREM(SDNode *N); SDValue SoftenFloatRes_FRINT(SDNode *N); + SDValue SoftenFloatRes_FROUND(SDNode *N); SDValue SoftenFloatRes_FSIN(SDNode *N); SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); @@ -466,6 +471,7 @@ private: void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -476,6 +482,7 @@ private: // Float Operand Expansion. bool ExpandFloatOperand(SDNode *N, unsigned OperandNo); SDValue ExpandFloatOp_BR_CC(SDNode *N); + SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N); SDValue ExpandFloatOp_FP_ROUND(SDNode *N); SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N); SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N); @@ -484,7 +491,7 @@ private: SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo); void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, - ISD::CondCode &CCCode, DebugLoc dl); + ISD::CondCode &CCCode, SDLoc dl); //===--------------------------------------------------------------------===// // Scalarization Support: LegalizeVectorTypes.cpp @@ -530,7 +537,7 @@ private: // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_BITCAST(SDNode *N); - SDValue ScalarizeVecOp_EXTEND(SDNode *N); + SDValue ScalarizeVecOp_UnaryOp(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); @@ -554,6 +561,7 @@ private: void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -624,6 +632,7 @@ private: SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); + SDValue WidenVecRes_BinaryCanTrap(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); @@ -649,7 +658,7 @@ private: /// loads to load a vector with a resulting wider type. It takes /// LdChain: list of chains for the load to be generated. /// Ld: load to widen - SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, + SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, LoadSDNode *LD); /// GenWidenVectorExtLoads - Helper function to generate a set of extension @@ -657,20 +666,20 @@ private: /// LdChain: list of chains for the load to be generated. /// Ld: load to widen /// ExtType: extension element type - SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, + SDValue GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, LoadSDNode *LD, ISD::LoadExtType ExtType); /// Helper genWidenVectorStores - Helper function to generate a set of /// stores to store a widen vector into non widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value - void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST); + void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); /// Helper genWidenVectorTruncStores - Helper function to generate a set of /// stores to store a truncate widen vector into non widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value - void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, + void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); /// Modifies a vector input (widen or narrows) to a vector of NVT. The @@ -695,10 +704,6 @@ private: GetExpandedFloat(Op, Lo, Hi); } - /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type - /// which is split (or expanded) into two not necessarily identical pieces. - void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT); - /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and /// high parts of the given value. void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi); @@ -726,6 +731,12 @@ private: GetExpandedFloat(Op, Lo, Hi); } + + /// This function will split the integer \p Op into \p NumElements + /// operations of type \p EltVT and store them in \p Ops. + void IntegerToVector(SDValue Op, unsigned NumElements, + SmallVectorImpl<SDValue> &Ops, EVT EltVT); + // Generic Result Expansion. void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 222d1c0..c749fde 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -41,7 +41,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Handle some special cases efficiently. switch (getTypeAction(InVT)) { @@ -77,12 +77,9 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypeWidenVector: { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); InOp = GetWidenedVector(InOp); - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - InVT.getVectorNumElements()/2); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + EVT LoVT, HiVT; + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); + llvm::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); @@ -115,7 +112,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { SmallVector<SDValue, 8> Vals; for (unsigned i = 0; i < NumElems; ++i) Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, - CastInOp, DAG.getIntPtrConstant(i))); + CastInOp, DAG.getConstant(i, + TLI.getVectorIdxTy()))); // Build Lo, Hi pair by pairing extracted elements if needed. unsigned Slot = 0; @@ -161,13 +159,14 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { false, false, 0); // Load the first half from the stack slot. - Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, false, 0); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, + StackPtr.getValueType())); // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, @@ -203,7 +202,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue OldVec = N->getOperand(0); unsigned OldElts = OldVec.getValueType().getVectorNumElements(); EVT OldEltVT = OldVec.getValueType().getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Convert to a vector of the expanded element type, for example // <3 x i64> -> <6 x i32>. @@ -227,10 +226,6 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. SDValue Idx = N->getOperand(1); - // Make sure the type of Idx is big enough to hold the new values. - if (Idx.getValueType().bitsLT(TLI.getPointerTy())) - Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); - Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); @@ -245,7 +240,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(ISD::isNormalLoad(N) && "This routine only for normal loads!"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); LoadSDNode *LD = cast<LoadSDNode>(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); @@ -255,20 +250,22 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -289,7 +286,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDValue Chain = N->getOperand(0); SDValue Ptr = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); const unsigned Align = N->getConstantOperandVal(3); Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align); @@ -309,29 +306,54 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { // Generic Operand Expansion. //===--------------------------------------------------------------------===// +void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements, + SmallVectorImpl<SDValue> &Ops, + EVT EltVT) { + assert(Op.getValueType().isInteger()); + SDLoc DL(Op); + SDValue Parts[2]; + + if (NumElements > 1) { + NumElements >>= 1; + SplitInteger(Op, Parts[0], Parts[1]); + if (TLI.isBigEndian()) + std::swap(Parts[0], Parts[1]); + IntegerToVector(Parts[0], NumElements, Ops, EltVT); + IntegerToVector(Parts[1], NumElements, Ops, EltVT); + } else { + Ops.push_back(DAG.getNode(ISD::BITCAST, DL, EltVT, Op)); + } +} + SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (N->getValueType(0).isVector()) { // An illegal expanding type is being converted to a legal vector type. // Make a two element vector out of the expanded parts and convert that // instead, but only if the new vector type is legal (otherwise there // is no point, and it might create expansion loops). For example, on // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32. + // + // FIXME: I'm not sure why we are first trying to split the input into + // a 2 element vector, so I'm leaving it here to maintain the current + // behavior. + unsigned NumElts = 2; EVT OVT = N->getOperand(0).getValueType(); EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), - 2); + NumElts); + if (!isTypeLegal(NVT)) { + // If we can't find a legal type by splitting the integer in half, + // then we can use the node's value type. + NumElts = N->getValueType(0).getVectorNumElements(); + NVT = N->getValueType(0); + } - if (isTypeLegal(NVT)) { - SDValue Parts[2]; - GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]); + SmallVector<SDValue, 8> Ops; + IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType()); - if (TLI.isBigEndian()) - std::swap(Parts[0], Parts[1]); - - SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2); - return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); - } + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], NumElts); + return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); } // Otherwise, store to a temporary and load out again as the new type. @@ -344,7 +366,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { unsigned NumElts = VecVT.getVectorNumElements(); EVT OldVT = N->getOperand(0).getValueType(); EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); assert(OldVT == VecVT.getVectorElementType() && "BUILD_VECTOR operand type doesn't match vector element type!"); @@ -382,7 +404,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { // The vector type is legal but the element type needs expansion. EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Val = N->getOperand(1); EVT OldEVT = Val.getValueType(); @@ -406,7 +428,8 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx); Idx = DAG.getNode(ISD::ADD, dl, - Idx.getValueType(), Idx, DAG.getIntPtrConstant(1)); + Idx.getValueType(), Idx, + DAG.getConstant(1, Idx.getValueType())); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); // Convert the new vector to the old vector type. @@ -414,7 +437,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { } SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); assert(VT.getVectorElementType() == N->getOperand(0).getValueType() && "SCALAR_TO_VECTOR operand type doesn't match vector element type!"); @@ -430,7 +453,7 @@ SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { assert(ISD::isNormalStore(N) && "This routine only for normal stores!"); assert(OpNo == 1 && "Can only expand the stored value so far"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); StoreSDNode *St = cast<StoreSDNode>(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), @@ -440,6 +463,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { unsigned Alignment = St->getAlignment(); bool isVolatile = St->isVolatile(); bool isNonTemporal = St->isNonTemporal(); + const MDNode *TBAAInfo = St->getTBAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -451,15 +475,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); - assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!"); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -483,21 +506,19 @@ void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LL, LH, RL, RH, CL, CH; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitOp(N->getOperand(1), LL, LH); GetSplitOp(N->getOperand(2), RL, RH); SDValue Cond = N->getOperand(0); CL = CH = Cond; if (Cond.getValueType().isVector()) { - assert(Cond.getValueType().getVectorElementType() == MVT::i1 && - "Condition legalized before result?"); - unsigned NumElements = Cond.getValueType().getVectorNumElements(); - EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2); - CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, - DAG.getIntPtrConstant(0)); - CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, - DAG.getIntPtrConstant(NumElements / 2)); + // Check if there are already splitted versions of the vector available and + // use those instead of splitting the mask operand again. + if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Cond, CL, CH); + else + llvm::tie(CL, CH) = DAG.SplitVector(Cond, dl); } Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL); @@ -507,7 +528,7 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LL, LH, RL, RH; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitOp(N->getOperand(2), LL, LH); GetSplitOp(N->getOperand(3), RL, RH); @@ -519,7 +540,7 @@ void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getUNDEF(LoVT); Hi = DAG.getUNDEF(HiVT); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index c6e066e..2c3cdcc 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -171,7 +171,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { return TranslateLegalizeResults(Op, Result); case TargetLowering::Custom: Changed = true; - return LegalizeOp(TLI.LowerOperation(Result, DAG)); + return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG)); case TargetLowering::Expand: Changed = true; return LegalizeOp(ExpandStore(Op)); @@ -227,6 +227,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FP_TO_UINT: case ISD::FNEG: case ISD::FABS: + case ISD::FCOPYSIGN: case ISD::FSQRT: case ISD::FSIN: case ISD::FCOS: @@ -241,6 +242,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FTRUNC: case ISD::FRINT: case ISD::FNEARBYINT: + case ISD::FROUND: case ISD::FFLOOR: case ISD::FP_ROUND: case ISD::FP_EXTEND: @@ -320,7 +322,7 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { assert(Op.getNode()->getNumValues() == 1 && "Can't promote a vector with multiple results!"); MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SmallVector<SDValue, 4> Operands(Op.getNumOperands()); for (unsigned j = 0; j != Op.getNumOperands(); ++j) { @@ -357,7 +359,7 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { // Build a new vector type and check if it is legal. MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SmallVector<SDValue, 4> Operands(Op.getNumOperands()); unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : @@ -375,7 +377,7 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { SDValue VectorLegalizer::ExpandLoad(SDValue Op) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); SDValue Chain = LD->getChain(); SDValue BasePTR = LD->getBasePtr(); @@ -416,7 +418,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + LD->isInvariant(), LD->getAlignment(), + LD->getTBAAInfo()); } else { EVT LoadVT = WideVT; while (RemainingBytes < LoadBytes) { @@ -426,13 +429,14 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LoadVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->isNonTemporal(), LD->getAlignment(), + LD->getTBAAInfo()); } RemainingBytes -= LoadBytes; Offset += LoadBytes; BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(LoadBytes)); + DAG.getConstant(LoadBytes, BasePTR.getValueType())); LoadVals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -497,10 +501,10 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), SrcVT.getScalarType(), LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + LD->getAlignment(), LD->getTBAAInfo()); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(Stride)); + DAG.getConstant(Stride, BasePTR.getValueType())); Vals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -519,7 +523,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { } SDValue VectorLegalizer::ExpandStore(SDValue Op) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); SDValue Chain = ST->getChain(); SDValue BasePTR = ST->getBasePtr(); @@ -529,6 +533,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); unsigned NumElem = StVT.getVectorNumElements(); // The type of the data we want to save @@ -551,15 +556,15 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { SmallVector<SDValue, 8> Stores; for (unsigned Idx = 0; Idx < NumElem; Idx++) { SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - RegSclVT, Value, DAG.getIntPtrConstant(Idx)); + RegSclVT, Value, DAG.getConstant(Idx, TLI.getVectorIdxTy())); // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(Stride)); + DAG.getConstant(Stride, BasePTR.getValueType())); Stores.push_back(Store); } @@ -572,9 +577,9 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { // Lower a select instruction where the condition is a scalar and the // operands are vectors. Lower this select to VSELECT and implement it - // using XOR AND OR. The selector bit is broadcasted. + // using XOR AND OR. The selector bit is broadcasted. EVT VT = Op.getValueType(); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue Mask = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -597,15 +602,12 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { return DAG.UnrollVectorOp(Op.getNode()); // Generate a mask operand. - EVT MaskTy = TLI.getSetCCResultType(VT); - assert(MaskTy.isVector() && "Invalid CC type"); - assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits() - && "Invalid mask size"); + EVT MaskTy = VT.changeVectorElementTypeToInteger(); // What is the size of each element in the vector mask. EVT BitTy = MaskTy.getScalarType(); - Mask = DAG.getNode(ISD::SELECT, DL, BitTy, Mask, + Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy), DAG.getConstant(0, BitTy)); @@ -637,7 +639,7 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand) return DAG.UnrollVectorOp(Op.getNode()); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); unsigned BW = VT.getScalarType().getSizeInBits(); @@ -652,13 +654,14 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. - EVT VT = Op.getOperand(0).getValueType(); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue Mask = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue Op2 = Op.getOperand(2); + EVT VT = Mask.getValueType(); + // If we can't even use the basic vector operations of // AND,OR,XOR, we will have to scalarize the op. // Notice that the operation may be 'promoted' which means that it is @@ -673,8 +676,12 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { TargetLowering::ZeroOrNegativeOneBooleanContent) return DAG.UnrollVectorOp(Op.getNode()); - assert(VT.getSizeInBits() == Op1.getValueType().getSizeInBits() - && "Invalid mask size"); + // If the mask and the type are different sizes, unroll the vector op. This + // can occur when getSetCCResultType returns something that is different in + // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. + if (VT.getSizeInBits() != Op1.getValueType().getSizeInBits()) + return DAG.UnrollVectorOp(Op.getNode()); + // Bitcast the operands to be the same type as the mask. // This is needed when we select between FP types because // the mask is a vector of integers. @@ -693,7 +700,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { EVT VT = Op.getOperand(0).getValueType(); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); // Make sure that the SINT_TO_FP and SRL instructions are available. if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || @@ -734,7 +741,7 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType()); - return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), Zero, Op.getOperand(0)); } return DAG.UnrollVectorOp(Op.getNode()); @@ -746,19 +753,20 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { EVT EltVT = VT.getVectorElementType(); SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); EVT TmpEltVT = LHS.getValueType().getVectorElementType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SmallVector<SDValue, 8> Ops(NumElems); for (unsigned i = 0; i < NumElems; ++i) { SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, - DAG.getIntPtrConstant(i)); + DAG.getConstant(i, TLI.getVectorIdxTy())); SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, - DAG.getIntPtrConstant(i)); - Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT), + DAG.getConstant(i, TLI.getVectorIdxTy())); + Ops[i] = DAG.getNode(ISD::SETCC, dl, + TLI.getSetCCResultType(*DAG.getContext(), TmpEltVT), LHSElem, RHSElem, CC); - Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i], - DAG.getConstant(APInt::getAllOnesValue - (EltVT.getSizeInBits()), EltVT), - DAG.getConstant(0, EltVT)); + Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], + DAG.getConstant(APInt::getAllOnesValue + (EltVT.getSizeInBits()), EltVT), + DAG.getConstant(0, EltVT)); } return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 04c6bfd..f7a3e3d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -83,6 +83,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -97,6 +98,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::ADD: case ISD::AND: case ISD::FADD: + case ISD::FCOPYSIGN: case ISD::FDIV: case ISD::FMUL: case ISD::FPOW: @@ -128,7 +130,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); } @@ -136,7 +138,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { SDValue Op0 = GetScalarizedVector(N->getOperand(0)); SDValue Op1 = GetScalarizedVector(N->getOperand(1)); SDValue Op2 = GetScalarizedVector(N->getOperand(2)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1, Op2); } @@ -148,7 +150,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { EVT NewVT = N->getValueType(0).getVectorElementType(); - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(N), NewVT, N->getOperand(0)); } @@ -158,14 +160,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { // The BUILD_VECTOR operands may be of wider element types and // we may need to truncate them back to the requested return type. if (EltVT.isInteger()) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); return InOp; } SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { EVT NewVT = N->getValueType(0).getVectorElementType(); SDValue Op0 = GetScalarizedVector(N->getOperand(0)); - return DAG.getConvertRndSat(NewVT, N->getDebugLoc(), + return DAG.getConvertRndSat(NewVT, SDLoc(N), Op0, DAG.getValueType(NewVT), DAG.getValueType(Op0.getValueType()), N->getOperand(3), @@ -174,7 +176,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { } SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), N->getValueType(0).getVectorElementType(), N->getOperand(0), N->getOperand(1)); } @@ -182,13 +184,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { EVT NewVT = N->getValueType(0).getVectorElementType(); SDValue Op = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), NewVT, Op, N->getOperand(1)); } SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { SDValue Op = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::FPOWI, N->getDebugLoc(), + return DAG.getNode(ISD::FPOWI, SDLoc(N), Op.getValueType(), Op, N->getOperand(1)); } @@ -199,7 +201,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { EVT EltVT = N->getValueType(0).getVectorElementType(); if (Op.getValueType() != EltVT) // FIXME: Can this happen for floating point types? - Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op); + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op); return Op; } @@ -209,13 +211,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getExtensionType(), N->getValueType(0).getVectorElementType(), - N->getDebugLoc(), + SDLoc(N), N->getChain(), N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()), N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), - N->isInvariant(), N->getOriginalAlignment()); + N->isInvariant(), N->getOriginalAlignment(), + N->getTBAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -227,14 +230,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. EVT DestVT = N->getValueType(0).getVectorElementType(); SDValue Op = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op); + return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); } SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { EVT EltVT = N->getValueType(0).getVectorElementType(); EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType(); SDValue LHS = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), EltVT, + return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT, LHS, DAG.getValueType(ExtVT)); } @@ -244,7 +247,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { EVT EltVT = N->getValueType(0).getVectorElementType(); SDValue InOp = N->getOperand(0); if (InOp.getValueType() != EltVT) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); return InOp; } @@ -262,33 +265,34 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { assert(VecBool == TargetLowering::UndefinedBooleanContent || VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent); // Vector read from all ones, scalar expects a single 1 so mask. - Cond = DAG.getNode(ISD::AND, N->getDebugLoc(), CondVT, + Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT, Cond, DAG.getConstant(1, CondVT)); break; case TargetLowering::ZeroOrNegativeOneBooleanContent: assert(VecBool == TargetLowering::UndefinedBooleanContent || VecBool == TargetLowering::ZeroOrOneBooleanContent); // Vector reads from a one, scalar from all ones so sign extend. - Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), CondVT, + Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT, Cond, DAG.getValueType(MVT::i1)); break; } } - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), - LHS.getValueType(), Cond, LHS, - GetScalarizedVector(N->getOperand(2))); + + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), Cond, LHS, + GetScalarizedVector(N->getOperand(2))); } SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(1)); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), - LHS.getValueType(), N->getOperand(0), LHS, - GetScalarizedVector(N->getOperand(2))); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, + GetScalarizedVector(N->getOperand(2))); } SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(2)); - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(), + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(), N->getOperand(0), N->getOperand(1), LHS, GetScalarizedVector(N->getOperand(3)), N->getOperand(4)); @@ -303,7 +307,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Turn it into a scalar SETCC. return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); @@ -330,7 +334,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); EVT NVT = N->getValueType(0).getVectorElementType(); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Turn it into a scalar SETCC. SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, @@ -368,7 +372,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: - Res = ScalarizeVecOp_EXTEND(N); + case ISD::TRUNCATE: + Res = ScalarizeVecOp_UnaryOp(N); break; case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); @@ -401,22 +406,22 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { /// to be scalarized, it must be <1 x ty>. Convert the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { SDValue Elt = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Elt); } /// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs /// to be scalarized, it must be <1 x ty>. Extend the element instead. -SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTEND(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { assert(N->getValueType(0).getVectorNumElements() == 1 && "Unexected vector type!"); SDValue Elt = GetScalarizedVector(N->getOperand(0)); SmallVector<SDValue, 1> Ops(1); - Ops[0] = DAG.getNode(N->getOpcode(), N->getDebugLoc(), + Ops[0] = DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0).getScalarType(), Elt); // Revectorize the result so the types line up with what the uses of this // expression expect. - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), &Ops[0], 1); } @@ -426,7 +431,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SmallVector<SDValue, 8> Ops(N->getNumOperands()); for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) Ops[i] = GetScalarizedVector(N->getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), &Ops[0], Ops.size()); } @@ -436,7 +441,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue Res = GetScalarizedVector(N->getOperand(0)); if (Res.getValueType() != N->getValueType(0)) - Res = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), + Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Res); return Res; } @@ -446,7 +451,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ assert(N->isUnindexed() && "Indexed store of one-element vector?"); assert(OpNo == 1 && "Do not know how to scalarize this operand!"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (N->isTruncatingStore()) return DAG.getTruncStore(N->getChain(), dl, @@ -454,12 +459,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getBasePtr(), N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), - N->getAlignment()); + N->getAlignment(), N->getTBAAInfo()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getPointerInfo(), N->isVolatile(), N->isNonTemporal(), - N->getOriginalAlignment()); + N->getOriginalAlignment(), N->getTBAAInfo()); } @@ -516,7 +521,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; - case ISD::ANY_EXTEND: case ISD::CONVERT_RNDSAT: case ISD::CTLZ: case ISD::CTTZ: @@ -539,21 +543,27 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: - case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: case ISD::TRUNCATE: case ISD::UINT_TO_FP: - case ISD::ZERO_EXTEND: SplitVecRes_UnaryOp(N, Lo, Hi); break; + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + SplitVecRes_ExtendOp(N, Lo, Hi); + break; + case ISD::ADD: case ISD::SUB: case ISD::MUL: case ISD::FADD: + case ISD::FCOPYSIGN: case ISD::FSUB: case ISD::FMUL: case ISD::SDIV: @@ -587,7 +597,7 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, GetSplitVector(N->getOperand(0), LHSLo, LHSHi); SDValue RHSLo, RHSHi; GetSplitVector(N->getOperand(1), RHSLo, RHSHi); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo); Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); @@ -601,7 +611,7 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi); SDValue Op2Lo, Op2Hi; GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), Op0Lo, Op1Lo, Op2Lo); @@ -614,8 +624,8 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, // We know the result is a vector. The input may be either a vector or a // scalar value. EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); - DebugLoc dl = N->getDebugLoc(); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + SDLoc dl(N); SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); @@ -668,8 +678,8 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; - DebugLoc dl = N->getDebugLoc(); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + SDLoc dl(N); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); unsigned LoNumElts = LoVT.getVectorNumElements(); SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts); Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size()); @@ -681,7 +691,7 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned NumSubvectors = N->getNumOperands() / 2; if (NumSubvectors == 1) { Lo = N->getOperand(0); @@ -690,7 +700,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, } EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors); Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size()); @@ -703,20 +713,21 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); SDValue Idx = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, - DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements())); + DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), + TLI.getVectorIdxTy())); } void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitVector(N->getOperand(0), Lo, Hi); Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1)); Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); @@ -726,10 +737,11 @@ void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; GetSplitVector(N->getOperand(0), LHSLo, LHSHi); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT LoVT, HiVT; - GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = + DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT()); Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, DAG.getValueType(LoVT)); @@ -742,7 +754,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue Vec = N->getOperand(0); SDValue Elt = N->getOperand(1); SDValue Idx = N->getOperand(2); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitVector(Vec, Lo, Hi); if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) { @@ -753,7 +765,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, Lo.getValueType(), Lo, Elt, Idx); else Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, - DAG.getIntPtrConstant(IdxVal - LoNumElts)); + DAG.getConstant(IdxVal - LoNumElts, + TLI.getVectorIdxTy())); return; } @@ -780,7 +793,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, StackPtr.getValueType())); // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), @@ -790,8 +803,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; - DebugLoc dl = N->getDebugLoc(); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + SDLoc dl(N); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); Hi = DAG.getUNDEF(HiVT); } @@ -800,8 +813,8 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi) { assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); EVT LoVT, HiVT; - DebugLoc dl = LD->getDebugLoc(); - GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT); + SDLoc dl(LD); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); ISD::LoadExtType ExtType = LD->getExtensionType(); SDValue Ch = LD->getChain(); @@ -812,20 +825,22 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); EVT LoMemVT, HiMemVT; - GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, - isInvariant, Alignment); + isInvariant, Alignment, TBAAInfo); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment); + HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -843,23 +858,13 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { "Operand types must be vectors"); EVT LoVT, HiVT; - DebugLoc DL = N->getDebugLoc(); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + SDLoc DL(N); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // Split the input. - EVT InVT = N->getOperand(0).getValueType(); SDValue LL, LH, RL, RH; - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(0)); - LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - - RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), - DAG.getIntPtrConstant(0)); - RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); @@ -869,22 +874,16 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi) { // Get the dest types - they may not match the input types, e.g. int_to_fp. EVT LoVT, HiVT; - DebugLoc dl = N->getDebugLoc(); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + SDLoc dl(N); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // If the input also splits, handle it directly for a compile time speedup. // Otherwise split it by hand. EVT InVT = N->getOperand(0).getValueType(); - if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) GetSplitVector(N->getOperand(0), Lo, Hi); - } else { - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - } + else + llvm::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); if (N->getOpcode() == ISD::FP_ROUND) { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); @@ -907,11 +906,63 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, } } +void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + EVT SrcVT = N->getOperand(0).getValueType(); + EVT DestVT = N->getValueType(0); + EVT LoVT, HiVT; + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); + + // We can do better than a generic split operation if the extend is doing + // more than just doubling the width of the elements and the following are + // true: + // - The number of vector elements is even, + // - the source type is legal, + // - the type of a split source is illegal, + // - the type of an extended (by doubling element size) source is legal, and + // - the type of that extended source when split is legal. + // + // This won't necessarily completely legalize the operation, but it will + // more effectively move in the right direction and prevent falling down + // to scalarization in many cases due to the input vector being split too + // far. + unsigned NumElements = SrcVT.getVectorNumElements(); + if ((NumElements & 1) == 0 && + SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) { + LLVMContext &Ctx = *DAG.getContext(); + EVT NewSrcVT = EVT::getVectorVT( + Ctx, EVT::getIntegerVT( + Ctx, SrcVT.getVectorElementType().getSizeInBits() * 2), + NumElements); + EVT SplitSrcVT = + EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2); + EVT SplitLoVT, SplitHiVT; + llvm::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); + if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && + TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { + DEBUG(dbgs() << "Split vector extend via incremental extend:"; + N->dump(&DAG); dbgs() << "\n"); + // Extend the source vector by one step. + SDValue NewSrc = + DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); + // Get the low and high halves of the new, extended one step, vector. + llvm::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); + // Extend those vector halves the rest of the way. + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + return; + } + } + // Fall back to the generic unary operator splitting otherwise. + SplitVecRes_UnaryOp(N, Lo, Hi); +} + void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi) { // The low and high parts of the original input give four input vectors. SDValue Inputs[4]; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); EVT NewVT = Inputs[0].getValueType(); @@ -994,7 +1045,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, // Extract the vector element by hand. SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Inputs[Input], DAG.getIntPtrConstant(Idx))); + Inputs[Input], DAG.getConstant(Idx, + TLI.getVectorIdxTy()))); } // Construct the Lo/Hi output using a BUILD_VECTOR. @@ -1030,6 +1082,10 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { dbgs() << "\n"); SDValue Res = SDValue(); + // See if the target wants to custom split this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + if (Res.getNode() == 0) { switch (N->getOpcode()) { default: @@ -1094,41 +1150,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { SDValue Mask = N->getOperand(0); SDValue Src0 = N->getOperand(1); SDValue Src1 = N->getOperand(2); - DebugLoc DL = N->getDebugLoc(); - EVT MaskVT = Mask.getValueType(); - assert(MaskVT.isVector() && "VSELECT without a vector mask?"); + EVT Src0VT = Src0.getValueType(); + SDLoc DL(N); + assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?"); SDValue Lo, Hi; GetSplitVector(N->getOperand(0), Lo, Hi); assert(Lo.getValueType() == Hi.getValueType() && - "Lo and Hi have differing types");; - - unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); - unsigned HiNumElts = Hi.getValueType().getVectorNumElements(); - assert(LoNumElts == HiNumElts && "Asymmetric vector split?"); - - LLVMContext &Ctx = *DAG.getContext(); - SDValue Zero = DAG.getIntPtrConstant(0); - SDValue LoElts = DAG.getIntPtrConstant(LoNumElts); - EVT Src0VT = Src0.getValueType(); - EVT Src0EltTy = Src0VT.getVectorElementType(); - EVT MaskEltTy = MaskVT.getVectorElementType(); - - EVT LoOpVT = EVT::getVectorVT(Ctx, Src0EltTy, LoNumElts); - EVT LoMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, LoNumElts); - EVT HiOpVT = EVT::getVectorVT(Ctx, Src0EltTy, HiNumElts); - EVT HiMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, HiNumElts); - - SDValue LoOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src0, Zero); - SDValue LoOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src1, Zero); + "Lo and Hi have differing types"); - SDValue HiOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src0, LoElts); - SDValue HiOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src1, LoElts); + EVT LoOpVT, HiOpVT; + llvm::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); + assert(LoOpVT == HiOpVT && "Asymmetric vector split?"); - SDValue LoMask = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoMaskVT, Mask, Zero); - SDValue HiMask = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiMaskVT, Mask, LoElts); + SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask; + llvm::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); + llvm::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); + llvm::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); SDValue LoSelect = DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); @@ -1142,7 +1180,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { // The result has a legal vector type, but the input needs splitting. EVT ResVT = N->getValueType(0); SDValue Lo, Hi; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitVector(N->getOperand(0), Lo, Hi); EVT InVT = Lo.getValueType(); @@ -1167,7 +1205,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { if (TLI.isBigEndian()) std::swap(Lo, Hi); - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), JoinIntegers(Lo, Hi)); } @@ -1175,7 +1213,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { // We know that the extracted result type is legal. EVT SubVT = N->getValueType(0); SDValue Idx = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Lo, Hi; GetSplitVector(N->getOperand(0), Lo, Hi); @@ -1215,7 +1253,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Store the vector to the stack. EVT EltVT = VecVT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo(), false, false, 0); @@ -1229,7 +1267,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { assert(N->isUnindexed() && "Indexed store of vector?"); assert(OpNo == 1 && "Can only split the stored value"); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); bool isTruncating = N->isTruncatingStore(); SDValue Ch = N->getChain(); @@ -1238,39 +1276,40 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned Alignment = N->getOriginalAlignment(); bool isVol = N->isVolatile(); bool isNT = N->isNonTemporal(); + const MDNode *TBAAInfo = N->getTBAAInfo(); SDValue Lo, Hi; GetSplitVector(N->getOperand(1), Lo, Hi); EVT LoMemVT, HiMemVT; - GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; if (isTruncating) Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - LoMemVT, isVol, isNT, Alignment); + LoMemVT, isVol, isNT, Alignment, TBAAInfo); else Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - isVol, isNT, Alignment); + isVol, isNT, Alignment, TBAAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); if (isTruncating) Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVol, isNT, Alignment); + HiMemVT, isVol, isNT, Alignment, TBAAInfo); else Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - isVol, isNT, Alignment); + isVol, isNT, Alignment, TBAAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // The input operands all must have the same type, and we know the result // type is valid. Convert this to a buildvector which extracts all the @@ -1284,7 +1323,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); i != e; ++i) { Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, - Op, DAG.getIntPtrConstant(i))); + Op, DAG.getConstant(i, TLI.getVectorIdxTy()))); } } @@ -1327,15 +1366,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { // to split more than once. if (InElementSize <= OutElementSize * 2) return SplitVecOp_UnaryOp(N); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Extract the halves of the input via extract_subvector. - EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), NumElements/2); - SDValue InLoVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, - DAG.getIntPtrConstant(0)); - SDValue InHiVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, - DAG.getIntPtrConstant(NumElements/2)); + SDValue InLoVec, InHiVec; + llvm::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); // Truncate them to 1/2 the element size. EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, @@ -1359,7 +1394,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { "Operand types must be vectors"); // The result has a legal vector type, but the input needs splitting. SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes; - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); GetSplitVector(N->getOperand(0), Lo0, Hi0); GetSplitVector(N->getOperand(1), Lo1, Hi1); unsigned PartElements = Lo0.getValueType().getVectorNumElements(); @@ -1377,7 +1412,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { // The result has a legal vector type, but the input needs splitting. EVT ResVT = N->getValueType(0); SDValue Lo, Hi; - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); GetSplitVector(N->getOperand(0), Lo, Hi); EVT InVT = Lo.getValueType(); @@ -1434,27 +1469,31 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VECTOR_SHUFFLE: Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); break; + case ISD::ADD: case ISD::AND: case ISD::BSWAP: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::OR: + case ISD::SUB: + case ISD::XOR: + Res = WidenVecRes_Binary(N); + break; + case ISD::FADD: case ISD::FCOPYSIGN: - case ISD::FDIV: case ISD::FMUL: case ISD::FPOW: - case ISD::FREM: case ISD::FSUB: - case ISD::MUL: - case ISD::MULHS: - case ISD::MULHU: - case ISD::OR: + case ISD::FDIV: + case ISD::FREM: case ISD::SDIV: - case ISD::SREM: case ISD::UDIV: + case ISD::SREM: case ISD::UREM: - case ISD::SUB: - case ISD::XOR: - Res = WidenVecRes_Binary(N); + Res = WidenVecRes_BinaryCanTrap(N); break; case ISD::FPOWI: @@ -1495,6 +1534,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -1512,7 +1552,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { // Ternary op widening. - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); @@ -1522,8 +1562,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { // Binary op widening. + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { + // Binary op widening for operations that can trap. unsigned Opcode = N->getOpcode(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; @@ -1562,9 +1611,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { while (CurNumElts != 0) { while (CurNumElts >= NumElts) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); Idx += NumElts; CurNumElts -= NumElts; @@ -1577,9 +1626,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { if (NumElts == 1) { for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp1, DAG.getIntPtrConstant(Idx)); + InOp1, DAG.getConstant(Idx, + TLI.getVectorIdxTy())); SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp2, DAG.getIntPtrConstant(Idx)); + InOp2, DAG.getConstant(Idx, + TLI.getVectorIdxTy())); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2); } @@ -1617,7 +1668,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { unsigned NumToInsert = ConcatEnd - Idx - 1; for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, - ConcatOps[OpIdx], DAG.getIntPtrConstant(i)); + ConcatOps[OpIdx], DAG.getConstant(i, + TLI.getVectorIdxTy())); } ConcatOps[Idx+1] = VecOp; ConcatEnd = Idx + 2; @@ -1659,7 +1711,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDValue InOp = N->getOperand(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); @@ -1705,7 +1757,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { if (InVTNumElts % WidenNumElts == 0) { SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, - InOp, DAG.getIntPtrConstant(0)); + InOp, DAG.getConstant(0, + TLI.getVectorIdxTy())); // Extract the input and convert the shorten input vector. if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVal); @@ -1720,7 +1773,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { unsigned i; for (i=0; i < MinElts; ++i) { SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, - DAG.getIntPtrConstant(i)); + DAG.getConstant(i, TLI.getVectorIdxTy())); if (N->getNumOperands() == 1) Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); else @@ -1738,7 +1791,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); SDValue ShOp = N->getOperand(1); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); } SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { @@ -1757,14 +1810,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { if (ShVT != ShWidenVT) ShOp = ModifyToType(ShOp, ShWidenVT); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); } SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { // Unary op widening. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp); } SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { @@ -1774,7 +1827,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { .getVectorElementType(), WidenVT.getVectorNumElements()); SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, WidenLHS, DAG.getValueType(ExtVT)); } @@ -1788,7 +1841,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { EVT InVT = InOp.getValueType(); EVT VT = N->getValueType(0); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); switch (getTypeAction(InVT)) { case TargetLowering::TypeLegal: @@ -1868,19 +1921,21 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Build a vector with undefined for the new nodes. EVT VT = N->getValueType(0); - EVT EltVT = VT.getVectorElementType(); + + // Integer BUILD_VECTOR operands may be larger than the node's vector element + // type. The UNDEFs need to have the same type as the existing operands. + EVT EltVT = N->getOperand(0).getValueType(); unsigned NumElts = VT.getVectorNumElements(); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); unsigned WidenNumElts = WidenVT.getVectorNumElements(); SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end()); - NewOps.reserve(WidenNumElts); - for (unsigned i = NumElts; i < WidenNumElts; ++i) - NewOps.push_back(DAG.getUNDEF(EltVT)); + assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!"); + NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT)); return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size()); } @@ -1888,7 +1943,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { EVT InVT = N->getOperand(0).getValueType(); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned WidenNumElts = WidenVT.getVectorNumElements(); unsigned NumInElts = InVT.getVectorNumElements(); unsigned NumOperands = N->getNumOperands(); @@ -1946,7 +2001,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(j)); + DAG.getConstant(j, TLI.getVectorIdxTy())); } SDValue UndefVal = DAG.getUNDEF(EltVT); for (; Idx < WidenNumElts; ++Idx) @@ -1955,7 +2010,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue InOp = N->getOperand(0); SDValue RndOp = N->getOperand(3); SDValue SatOp = N->getOperand(4); @@ -2004,7 +2059,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { if (InVTNumElts % WidenNumElts == 0) { // Extract the input and convert the shorten input vector. InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, SatOp, CvtCode); } @@ -2020,7 +2075,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { unsigned i; for (i=0; i < MinElts; ++i) { SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getIntPtrConstant(i)); + DAG.getConstant(i, TLI.getVectorIdxTy())); Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, SatOp, CvtCode); } @@ -2038,7 +2093,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { unsigned WidenNumElts = WidenVT.getVectorNumElements(); SDValue InOp = N->getOperand(0); SDValue Idx = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) InOp = GetWidenedVector(InOp); @@ -2063,7 +2118,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { unsigned i; for (i=0; i < NumElts; ++i) Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(IdxVal+i)); + DAG.getConstant(IdxVal+i, TLI.getVectorIdxTy())); SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) @@ -2073,7 +2128,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(), + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), InOp.getValueType(), InOp, N->getOperand(1), N->getOperand(2)); } @@ -2096,7 +2151,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { if (LdChain.size() == 1) NewChain = LdChain[0]; else - NewChain = DAG.getNode(ISD::TokenFactor, LD->getDebugLoc(), MVT::Other, + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, &LdChain[0], LdChain.size()); // Modified the chain - switch anything that used the old chain to use @@ -2108,7 +2163,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(), + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), WidenVT, N->getOperand(0)); } @@ -2132,14 +2187,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDValue InOp2 = GetWidenedVector(N->getOperand(2)); assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, Cond1, InOp1, InOp2); } SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(2)); SDValue InOp2 = GetWidenedVector(N->getOperand(3)); - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), InOp1.getValueType(), N->getOperand(0), N->getOperand(1), InOp1, InOp2, N->getOperand(4)); } @@ -2153,7 +2208,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(ISD::SETCC, N->getDebugLoc(), WidenVT, + return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, InOp1, InOp2, N->getOperand(2)); } @@ -2164,7 +2219,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); unsigned NumElts = VT.getVectorNumElements(); @@ -2208,7 +2263,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { InOp2.getValueType() == WidenInVT && "Input not widened to expected type!"); (void)WidenInVT; - return DAG.getNode(ISD::SETCC, N->getDebugLoc(), + return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, InOp1, InOp2, N->getOperand(2)); } @@ -2277,7 +2332,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // into some scalar code and create a nasty build vector. EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned NumElts = VT.getVectorNumElements(); SDValue InOp = N->getOperand(0); if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) @@ -2290,7 +2345,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { for (unsigned i=0; i < NumElts; ++i) Ops[i] = DAG.getNode(Opcode, dl, EltVT, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getIntPtrConstant(i))); + DAG.getConstant(i, TLI.getVectorIdxTy()))); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); } @@ -2299,7 +2354,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { EVT VT = N->getValueType(0); SDValue InOp = GetWidenedVector(N->getOperand(0)); EVT InWidenVT = InOp.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Check if we can convert between two legal vector types and extract. unsigned InWidenSize = InWidenVT.getSizeInBits(); @@ -2311,7 +2366,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); } } @@ -2324,7 +2379,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { // nasty build vector. EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned NumElts = VT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(NumElts); @@ -2339,20 +2394,20 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(j)); + DAG.getConstant(j, TLI.getVectorIdxTy())); } return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); } SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0), InOp, N->getOperand(1)); } SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), N->getValueType(0), InOp, N->getOperand(1)); } @@ -2370,14 +2425,14 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { if (StChain.size() == 1) return StChain[0]; else - return DAG.getNode(ISD::TokenFactor, ST->getDebugLoc(), + return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other,&StChain[0],StChain.size()); } SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // WARNING: In this code we widen the compare instruction with garbage. // This garbage may contain denormal floats which may be slow. Is this a real @@ -2385,8 +2440,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { // Get a new SETCC node to compare the newly widened operands. // Only some of the compared elements are legal. - EVT SVT = TLI.getSetCCResultType(InOp0.getValueType()); - SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(), + EVT SVT = TLI.getSetCCResultType(*DAG.getContext(), InOp0.getValueType()); + SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), SVT, InOp0, InOp1, N->getOperand(2)); // Extract the needed results from the result vector. @@ -2394,7 +2449,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SVT.getVectorElementType(), N->getValueType(0).getVectorNumElements()); SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, - ResVT, WideSETCC, DAG.getIntPtrConstant(0)); + ResVT, WideSETCC, DAG.getConstant(0, + TLI.getVectorIdxTy())); return PromoteTargetBoolean(CC, N->getValueType(0)); } @@ -2465,9 +2521,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, // LDOps: Load operators to build a vector type // [Start,End) the list of loads to use. static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, - SmallVector<SDValue, 16>& LdOps, + SmallVectorImpl<SDValue> &LdOps, unsigned Start, unsigned End) { - DebugLoc dl = LdOps[Start].getDebugLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc dl(LdOps[Start]); EVT LdTy = LdOps[Start].getValueType(); unsigned Width = VecTy.getSizeInBits(); unsigned NumElts = Width / LdTy.getSizeInBits(); @@ -2487,12 +2544,12 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, LdTy = NewLdTy; } VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], - DAG.getIntPtrConstant(Idx++)); + DAG.getConstant(Idx++, TLI.getVectorIdxTy())); } return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } -SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, +SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, LoadSDNode *LD) { // The strategy assumes that we can efficiently load powers of two widths. // The routines chops the vector into the largest vector loads with the same @@ -2501,7 +2558,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); unsigned WidenWidth = WidenVT.getSizeInBits(); EVT LdVT = LD->getMemoryVT(); - DebugLoc dl = LD->getDebugLoc(); + SDLoc dl(LD); assert(LdVT.isVector() && WidenVT.isVector()); assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); @@ -2512,6 +2569,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; // Difference @@ -2521,7 +2579,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Align); + isVolatile, isNonTemporal, isInvariant, Align, + TBAAInfo); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction @@ -2557,7 +2616,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, unsigned Increment = NewVTWidth / 8; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getIntPtrConstant(Increment)); + DAG.getConstant(Increment, BasePtr.getValueType())); SDValue L; if (LdWidth < NewVTWidth) { @@ -2566,7 +2625,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, NewVTWidth = NewVT.getSizeInBits(); L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, isInvariant, MinAlign(Align, Increment)); + isNonTemporal, isInvariant, MinAlign(Align, Increment), + TBAAInfo); LdChain.push_back(L.getValue(1)); if (L->getValueType(0).isVector()) { SmallVector<SDValue, 16> Loads; @@ -2582,7 +2642,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, } else { L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, isInvariant, MinAlign(Align, Increment)); + isNonTemporal, isInvariant, MinAlign(Align, Increment), + TBAAInfo); LdChain.push_back(L.getValue(1)); } @@ -2646,14 +2707,14 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, } SDValue -DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, - LoadSDNode * LD, +DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, + LoadSDNode *LD, ISD::LoadExtType ExtType) { // For extension loads, it may not be more efficient to chop up the vector // and then extended it. Instead, we unroll the load and build a new vector. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); EVT LdVT = LD->getMemoryVT(); - DebugLoc dl = LD->getDebugLoc(); + SDLoc dl(LD); assert(LdVT.isVector() && WidenVT.isVector()); // Load information @@ -2662,6 +2723,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); EVT EltVT = WidenVT.getVectorElementType(); EVT LdEltVT = LdVT.getVectorElementType(); @@ -2673,15 +2735,17 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, unsigned Increment = LdEltVT.getSizeInBits() / 8; Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(), - LdEltVT, isVolatile, isNonTemporal, Align); + LdEltVT, isVolatile, isNonTemporal, Align, TBAAInfo); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, DAG.getIntPtrConstant(Offset)); + BasePtr, + DAG.getConstant(Offset, + BasePtr.getValueType())); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, - isVolatile, isNonTemporal, Align); + isVolatile, isNonTemporal, Align, TBAAInfo); LdChain.push_back(Ops[i].getValue(1)); } @@ -2694,7 +2758,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, } -void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, +void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST) { // The strategy assumes that we can efficiently store powers of two widths. // The routines chops the vector into the largest vector stores with the same @@ -2704,8 +2768,9 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); - DebugLoc dl = ST->getDebugLoc(); + SDLoc dl(ST); EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); @@ -2726,16 +2791,16 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, unsigned NumVTElts = NewVT.getVectorNumElements(); do { SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, - MinAlign(Align, Offset))); + MinAlign(Align, Offset), TBAAInfo)); StWidth -= NewVTWidth; Offset += Increment; Idx += NumVTElts; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getIntPtrConstant(Increment)); + DAG.getConstant(Increment, BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); } else { // Cast the vector to the scalar type we can store @@ -2746,15 +2811,15 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, Idx = Idx * ValEltWidth / NewVTWidth; do { SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, - DAG.getIntPtrConstant(Idx++)); + DAG.getConstant(Idx++, TLI.getVectorIdxTy())); StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, - MinAlign(Align, Offset))); + MinAlign(Align, Offset), TBAAInfo)); StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getIntPtrConstant(Increment)); + DAG.getConstant(Increment, BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); // Restore index back to be relative to the original widen element type Idx = Idx * NewVTWidth / ValEltWidth; @@ -2763,7 +2828,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, } void -DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, +DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST) { // For extension loads, it may not be more efficient to truncate the vector // and then store it. Instead, we extract each element and then store it. @@ -2772,8 +2837,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); - DebugLoc dl = ST->getDebugLoc(); + SDLoc dl(ST); EVT StVT = ST->getMemoryVT(); EVT ValVT = ValOp.getValueType(); @@ -2791,20 +2857,22 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, unsigned Increment = ValEltVT.getSizeInBits() / 8; unsigned NumElts = StVT.getVectorNumElements(); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT, - isVolatile, isNonTemporal, Align)); + isVolatile, isNonTemporal, Align, + TBAAInfo)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, DAG.getIntPtrConstant(Offset)); + BasePtr, DAG.getConstant(Offset, + BasePtr.getValueType())); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), StEltVT, isVolatile, isNonTemporal, - MinAlign(Align, Offset))); + MinAlign(Align, Offset), TBAAInfo)); } } @@ -2816,7 +2884,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { EVT InVT = InOp.getValueType(); assert(InVT.getVectorElementType() == NVT.getVectorElementType() && "input and widen element type must match"); - DebugLoc dl = InOp.getDebugLoc(); + SDLoc dl(InOp); // Check if InOp already has the right width. if (InVT == NVT) @@ -2837,7 +2905,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); // Fall back to extract and build. SmallVector<SDValue, 16> Ops(WidenNumElts); @@ -2846,7 +2914,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { unsigned Idx; for (Idx = 0; Idx < MinNumElts; ++Idx) Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); SDValue UndefVal = DAG.getUNDEF(EltVT); for ( ; Idx < WidenNumElts; ++Idx) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 473e138..1dd2128 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -42,11 +42,11 @@ static cl::opt<signed> RegPressureThreshold( ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : Picker(this), - InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData()) + InstrItins(IS->getTargetLowering()->getTargetMachine().getInstrItineraryData()) { - TII = IS->getTargetLowering().getTargetMachine().getInstrInfo(); - TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo(); - TLI = &IS->getTargetLowering(); + TII = IS->getTargetLowering()->getTargetMachine().getInstrInfo(); + TRI = IS->getTargetLowering()->getTargetMachine().getRegisterInfo(); + TLI = IS->getTargetLowering(); const TargetMachine &tm = (*IS->MF).getTarget(); ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL); @@ -389,10 +389,9 @@ signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { // Constants used to denote relative importance of // heuristic components for cost computation. static const unsigned PriorityOne = 200; -static const unsigned PriorityTwo = 100; -static const unsigned PriorityThree = 50; -static const unsigned PriorityFour = 15; -static const unsigned PriorityFive = 5; +static const unsigned PriorityTwo = 50; +static const unsigned PriorityThree = 15; +static const unsigned PriorityFour = 5; static const unsigned ScaleOne = 20; static const unsigned ScaleTwo = 10; static const unsigned ScaleThree = 5; @@ -449,7 +448,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { if (N->isMachineOpcode()) { const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); if (TID.isCall()) - ResCount += (PriorityThree + (ScaleThree*N->getNumValues())); + ResCount += (PriorityTwo + (ScaleThree*N->getNumValues())); } else switch (N->getOpcode()) { @@ -457,11 +456,11 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { case ISD::TokenFactor: case ISD::CopyFromReg: case ISD::CopyToReg: - ResCount += PriorityFive; + ResCount += PriorityFour; break; case ISD::INLINEASM: - ResCount += PriorityFour; + ResCount += PriorityThree; break; } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h deleted file mode 100644 index 7e7b897..0000000 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h +++ /dev/null @@ -1,56 +0,0 @@ -//===-- llvm/CodeGen/SDNodeOrdering.h - SDNode Ordering ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the SDNodeOrdering class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_SDNODEORDERING_H -#define LLVM_CODEGEN_SDNODEORDERING_H - -#include "llvm/ADT/DenseMap.h" - -namespace llvm { - -class SDNode; - -/// SDNodeOrdering - Maps a unique (monotonically increasing) value to each -/// SDNode that roughly corresponds to the ordering of the original LLVM -/// instruction. This is used for turning off scheduling, because we'll forgo -/// the normal scheduling algorithms and output the instructions according to -/// this ordering. -class SDNodeOrdering { - DenseMap<const SDNode*, unsigned> OrderMap; - - void operator=(const SDNodeOrdering&) LLVM_DELETED_FUNCTION; - SDNodeOrdering(const SDNodeOrdering&) LLVM_DELETED_FUNCTION; -public: - SDNodeOrdering() {} - - void add(const SDNode *Node, unsigned NewOrder) { - unsigned &OldOrder = OrderMap[Node]; - if (OldOrder == 0 || (OldOrder > 0 && NewOrder < OldOrder)) - OldOrder = NewOrder; - } - void remove(const SDNode *Node) { - DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node); - if (Itr != OrderMap.end()) - OrderMap.erase(Itr); - } - void clear() { - OrderMap.clear(); - } - unsigned getOrder(const SDNode *Node) { - return OrderMap[Node]; - } -}; - -} // end llvm namespace - -#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index d1f36cb..6c5e0ab 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -102,8 +102,8 @@ private: void InsertCopiesAndMoveSuccs(SUnit*, unsigned, const TargetRegisterClass*, const TargetRegisterClass*, - SmallVector<SUnit*, 2>&); - bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); + SmallVectorImpl<SUnit*>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); void ListScheduleBottomUp(); /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. @@ -387,7 +387,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, - SmallVector<SUnit*, 2> &Copies) { + SmallVectorImpl<SUnit*> &Copies) { SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL)); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; @@ -448,7 +448,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, std::vector<SUnit*> &LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, - SmallVector<unsigned, 4> &LRegs, + SmallVectorImpl<unsigned> &LRegs, const TargetRegisterInfo *TRI) { bool Added = false; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { @@ -467,7 +467,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, /// If the specific node is the last one that's available to schedule, do /// whatever is necessary (i.e. backtracking or cloning) to make it possible. bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, - SmallVector<unsigned, 4> &LRegs){ + SmallVectorImpl<unsigned> &LRegs){ if (NumLiveRegs == 0) return false; @@ -567,7 +567,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { // "expensive to copy" values to break the dependency. In case even // that doesn't work, insert cross class copies. SUnit *TrySU = NotReady[0]; - SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index c009cfc..1a562d7 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -229,8 +229,8 @@ private: void InsertCopiesAndMoveSuccs(SUnit*, unsigned, const TargetRegisterClass*, const TargetRegisterClass*, - SmallVector<SUnit*, 2>&); - bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); + SmallVectorImpl<SUnit*>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); void releaseInterferences(unsigned Reg = 0); @@ -718,7 +718,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { // indicate the scheduled cycle. SU->setHeightToAtLeast(CurCycle); - // Reserve resources for the scheduled intruction. + // Reserve resources for the scheduled instruction. EmitNode(SU); Sequence.push_back(SU); @@ -1133,9 +1133,9 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { /// InsertCopiesAndMoveSuccs - Insert register copies and move all /// scheduled successors of the given SUnit to the last copy. void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - SmallVector<SUnit*, 2> &Copies) { + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVectorImpl<SUnit*> &Copies) { SUnit *CopyFromSU = CreateNewSUnit(NULL); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; @@ -1205,7 +1205,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, std::vector<SUnit*> &LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, - SmallVector<unsigned, 4> &LRegs, + SmallVectorImpl<unsigned> &LRegs, const TargetRegisterInfo *TRI) { for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { @@ -1227,7 +1227,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, std::vector<SUnit*> &LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, - SmallVector<unsigned, 4> &LRegs) { + SmallVectorImpl<unsigned> &LRegs) { // Look at all live registers. Skip Reg0 and the special CallResource. for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { if (!LiveRegDefs[i]) continue; @@ -1252,7 +1252,7 @@ static const uint32_t *getNodeRegMask(const SDNode *N) { /// If the specific node is the last one that's available to schedule, do /// whatever is necessary (i.e. backtracking or cloning) to make it possible. bool ScheduleDAGRRList:: -DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { +DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { if (NumLiveRegs == 0) return false; @@ -1331,7 +1331,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { SUnit *SU = Interferences[i-1]; LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); if (Reg) { - SmallVector<unsigned, 4> &LRegs = LRegsPos->second; + SmallVectorImpl<unsigned> &LRegs = LRegsPos->second; if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end()) continue; } @@ -1385,7 +1385,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // to resolve it. for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { SUnit *TrySU = Interferences[i]; - SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; // Try unscheduling up to the point where it's safe to schedule // this node. @@ -1433,7 +1433,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // insert cross class copies. // If it's not too expensive, i.e. cost != -1, issue copies. SUnit *TrySU = Interferences[0]; - SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; @@ -1692,7 +1692,7 @@ public: unsigned getNodeOrdering(const SUnit *SU) const { if (!SU->getNode()) return 0; - return scheduleDAG->DAG->GetOrdering(SU->getNode()); + return SU->getNode()->getIROrder(); } bool empty() const { return Queue.empty(); } @@ -2401,7 +2401,8 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { bool RHasPhysReg = right->hasPhysRegDefs; if (LHasPhysReg != RHasPhysReg) { #ifndef NDEBUG - const char *const PhysRegMsg[] = {" has no physreg"," defines a physreg"}; + static const char *const PhysRegMsg[] = { " has no physreg", + " defines a physreg" }; #endif DEBUG(dbgs() << " SU (" << left->NodeNum << ") " << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " @@ -3013,7 +3014,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - const TargetLowering *TLI = &IS->getTargetLowering(); + const TargetLowering *TLI = IS->getTargetLowering(); HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); @@ -3029,7 +3030,7 @@ llvm::createILPListDAGScheduler(SelectionDAGISel *IS, const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - const TargetLowering *TLI = &IS->getTargetLowering(); + const TargetLowering *TLI = IS->getTargetLowering(); ILPBURRPriorityQueue *PQ = new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index b22440d..054e3dd 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -690,21 +690,11 @@ void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { } #endif // NDEBUG -namespace { - struct OrderSorter { - bool operator()(const std::pair<unsigned, MachineInstr*> &A, - const std::pair<unsigned, MachineInstr*> &B) { - return A.first < B.first; - } - }; -} - /// ProcessSDDbgValues - Process SDDbgValues associated with this node. -static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, - InstrEmitter &Emitter, - SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, - DenseMap<SDValue, unsigned> &VRBaseMap, - unsigned Order) { +static void +ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, + SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders, + DenseMap<SDValue, unsigned> &VRBaseMap, unsigned Order) { if (!N->getHasDebugValue()) return; @@ -731,12 +721,12 @@ static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, // ProcessSourceNode - Process nodes with source order numbers. These are added // to a vector which EmitSchedule uses to determine how to insert dbg_value // instructions in the right order. -static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, - InstrEmitter &Emitter, - DenseMap<SDValue, unsigned> &VRBaseMap, - SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, - SmallSet<unsigned, 8> &Seen) { - unsigned Order = DAG->GetOrdering(N); +static void +ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, + DenseMap<SDValue, unsigned> &VRBaseMap, + SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders, + SmallSet<unsigned, 8> &Seen) { + unsigned Order = N->getIROrder(); if (!Order || !Seen.insert(Order)) { // Process any valid SDDbgValues even if node does not have any order // assigned. @@ -745,7 +735,10 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, } MachineBasicBlock *BB = Emitter.getBlock(); - if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { + if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() || + // Fast-isel may have inserted some instructions, in which case the + // BB->back().isPHI() test will not fire when we want it to. + prior(Emitter.getInsertPos())->isPHI()) { // Did not insert any instruction. Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); return; @@ -858,7 +851,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Sort the source order instructions and use the order to insert debug // values. - std::sort(Orders.begin(), Orders.end(), OrderSorter()); + std::sort(Orders.begin(), Orders.end(), less_first()); SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); @@ -883,7 +876,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Insert at the instruction, which may be in a different // block, if the block was split by a custom inserter. MachineBasicBlock::iterator Pos = MI; - MI->getParent()->insert(llvm::next(Pos), DbgMI); + MI->getParent()->insert(Pos, DbgMI); } } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 15235c8..45d5a4f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -13,7 +13,6 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "SDNodeDbgValue.h" -#include "SDNodeOrdering.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -636,9 +635,6 @@ void SelectionDAG::DeallocateNode(SDNode *N) { NodeAllocator.Deallocate(AllNodes.remove(N)); - // Remove the ordering of this node. - Ordering->remove(N); - // If any of the SDDbgValue nodes refer to this SDNode, invalidate them. ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N); for (unsigned i = 0, e = DbgVals.size(); i != e; ++i) @@ -868,30 +864,30 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { PointerType::get(Type::getInt8Ty(*getContext()), 0) : VT.getTypeForEVT(*getContext()); - return TLI.getDataLayout()->getABITypeAlignment(Ty); + return TM.getTargetLowering()->getDataLayout()->getABITypeAlignment(Ty); } // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), - TTI(0), OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), - getVTList(MVT::Other)), - Root(getEntryNode()), Ordering(0), UpdateListeners(0) { + : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), TLI(0), OptLevel(OL), + EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), + Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), + UpdateListeners(0) { AllNodes.push_back(&EntryNode); - Ordering = new SDNodeOrdering(); DbgInfo = new SDDbgInfo(); } -void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti) { +void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti, + const TargetLowering *tli) { MF = &mf; TTI = tti; + TLI = tli; Context = &mf.getFunction()->getContext(); } SelectionDAG::~SelectionDAG() { assert(!UpdateListeners && "Dangling registered DAGUpdateListeners"); allnodes_clear(); - delete Ordering; delete DbgInfo; } @@ -918,29 +914,28 @@ void SelectionDAG::clear() { EntryNode.UseList = 0; AllNodes.push_back(&EntryNode); Root = getEntryNode(); - Ordering->clear(); DbgInfo->clear(); } -SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { +SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::ANY_EXTEND, DL, VT, Op) : getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { +SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::SIGN_EXTEND, DL, VT, Op) : getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { +SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::ZERO_EXTEND, DL, VT, Op) : getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) { +SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { assert(!VT.isVector() && "getZeroExtendInReg should use the vector element type instead of " "the vector type!"); @@ -954,7 +949,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) { /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). /// -SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) { +SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); SDValue NegOne = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); @@ -979,16 +974,66 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { EVT EltVT = VT.getScalarType(); const ConstantInt *Elt = &Val; + const TargetLowering *TLI = TM.getTargetLowering(); + // In some cases the vector type is legal but the element type is illegal and // needs to be promoted, for example v8i8 on ARM. In this case, promote the // inserted value (the type does not need to match the vector element type). // Any extra bits introduced will be truncated away. - if (VT.isVector() && TLI.getTypeAction(*getContext(), EltVT) == + if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == TargetLowering::TypePromoteInteger) { - EltVT = TLI.getTypeToTransformTo(*getContext(), EltVT); + EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits()); Elt = ConstantInt::get(*getContext(), NewVal); } + // In other cases the element type is illegal and needs to be expanded, for + // example v2i64 on MIPS32. In this case, find the nearest legal type, split + // the value into n parts and use a vector type with n-times the elements. + // Then bitcast to the type requested. + // Legalizing constants too early makes the DAGCombiner's job harder so we + // only legalize if the DAG tells us we must produce legal types. + else if (NewNodesMustHaveLegalTypes && VT.isVector() && + TLI->getTypeAction(*getContext(), EltVT) == + TargetLowering::TypeExpandInteger) { + APInt NewVal = Elt->getValue(); + EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); + unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); + unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits; + EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts); + + // Check the temporary vector is the correct size. If this fails then + // getTypeToTransformTo() probably returned a type whose size (in bits) + // isn't a power-of-2 factor of the requested type size. + assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits()); + + SmallVector<SDValue, 2> EltParts; + for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) { + EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) + .trunc(ViaEltSizeInBits), + ViaEltVT, isT)); + } + + // EltParts is currently in little endian order. If we actually want + // big-endian order then reverse it now. + if (TLI->isBigEndian()) + std::reverse(EltParts.begin(), EltParts.end()); + + // The elements must be reversed when the element order is different + // to the endianness of the elements (because the BITCAST is itself a + // vector shuffle in this situation). However, we do not need any code to + // perform this reversal because getConstant() is producing a vector + // splat. + // This situation occurs in MIPS MSA. + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) + Ops.insert(Ops.end(), EltParts.begin(), EltParts.end()); + + SDValue Result = getNode(ISD::BITCAST, SDLoc(), VT, + getNode(ISD::BUILD_VECTOR, SDLoc(), ViaVecVT, + &Ops[0], Ops.size())); + return Result; + } assert(Elt->getBitWidth() == EltVT.getSizeInBits() && "APInt size does not match type size!"); @@ -1012,13 +1057,13 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { if (VT.isVector()) { SmallVector<SDValue, 8> Ops; Ops.assign(VT.getVectorNumElements(), Result); - Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size()); + Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, &Ops[0], Ops.size()); } return Result; } SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) { - return getConstant(Val, TLI.getPointerTy(), isTarget); + return getConstant(Val, TM.getTargetLowering()->getPointerTy(), isTarget); } @@ -1054,8 +1099,8 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ if (VT.isVector()) { SmallVector<SDValue, 8> Ops; Ops.assign(VT.getVectorNumElements(), Result); - // FIXME DebugLoc info might be appropriate here - Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size()); + // FIXME SDLoc info might be appropriate here + Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, &Ops[0], Ops.size()); } return Result; } @@ -1077,15 +1122,16 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { llvm_unreachable("Unsupported type in getConstantFP"); } -SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, +SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, EVT VT, int64_t Offset, bool isTargetGA, unsigned char TargetFlags) { assert((TargetFlags == 0 || isTargetGA) && "Cannot set target flags on target-independent globals"); + const TargetLowering *TLI = TM.getTargetLowering(); // Truncate (with sign-extension) the offset value to the pointer size. - unsigned BitWidth = TLI.getPointerTy().getSizeInBits(); + unsigned BitWidth = TLI->getPointerTypeSizeInBits(GV->getType()); if (BitWidth < 64) Offset = SignExtend64(Offset, BitWidth); @@ -1112,7 +1158,8 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL, GV, VT, + SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL.getIROrder(), + DL.getDebugLoc(), GV, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -1161,7 +1208,8 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType()); + Alignment = + TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); @@ -1188,7 +1236,8 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType()); + Alignment = + TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); @@ -1299,13 +1348,10 @@ static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) { } } -SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, +SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *Mask) { - assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE"); - assert(VT.isVector() && N1.getValueType().isVector() && - "Vector Shuffle VTs must be a vectors"); - assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() - && "Vector Shuffle VTs must have same element type"); + assert(VT == N1.getValueType() && VT == N2.getValueType() && + "Invalid VECTOR_SHUFFLE"); // Canonicalize shuffle undef, undef -> undef if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF) @@ -1354,17 +1400,13 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, commuteShuffle(N1, N2, MaskVec); } - // If Identity shuffle, or all shuffle in to undef, return that node. - bool AllUndef = true; + // If Identity shuffle return that node. bool Identity = true; for (unsigned i = 0; i != NElts; ++i) { if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; - if (MaskVec[i] >= 0) AllUndef = false; } - if (Identity && NElts == N1.getValueType().getVectorNumElements()) + if (Identity && NElts) return N1; - if (AllUndef) - return getUNDEF(VT); FoldingSetNodeID ID; SDValue Ops[2] = { N1, N2 }; @@ -1383,13 +1425,15 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); ShuffleVectorSDNode *N = - new (NodeAllocator) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); + new (NodeAllocator) ShuffleVectorSDNode(VT, dl.getIROrder(), + dl.getDebugLoc(), N1, N2, + MaskAlloc); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } -SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl, +SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, SDValue Val, SDValue DTy, SDValue STy, SDValue Rnd, SDValue Sat, ISD::CvtCode Code) { @@ -1406,8 +1450,9 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl, Ops, 5, - Code); + CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), + dl.getDebugLoc(), + Ops, 5, Code); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1441,7 +1486,7 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { return SDValue(N, 0); } -SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { +SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { FoldingSetNodeID ID; SDValue Ops[] = { Root }; AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), &Ops[0], 1); @@ -1450,7 +1495,8 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label); + SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), + dl.getDebugLoc(), Root, Label); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1513,16 +1559,36 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { return SDValue(N, 0); } +/// getAddrSpaceCast - Return an AddrSpaceCastSDNode. +SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, + unsigned SrcAS, unsigned DestAS) { + SDValue Ops[] = {Ptr}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), &Ops[0], 1); + ID.AddInteger(SrcAS); + ID.AddInteger(DestAS); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) AddrSpaceCastSDNode(dl.getIROrder(), + dl.getDebugLoc(), + VT, Ptr, SrcAS, DestAS); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { EVT OpTy = Op.getValueType(); - EVT ShTy = TLI.getShiftAmountTy(LHSTy); + EVT ShTy = TM.getTargetLowering()->getShiftAmountTy(LHSTy); if (OpTy == ShTy || OpTy.isVector()) return Op; ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; - return getNode(Opcode, Op.getDebugLoc(), ShTy, Op); + return getNode(Opcode, SDLoc(Op), ShTy, Op); } /// CreateStackTemporary - Create a stack temporary, suitable for holding the @@ -1531,11 +1597,12 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); unsigned ByteSize = VT.getStoreSize(); Type *Ty = VT.getTypeForEVT(*getContext()); + const TargetLowering *TLI = TM.getTargetLowering(); unsigned StackAlign = - std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), minAlign); + std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), minAlign); int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); - return getFrameIndex(FrameIdx, TLI.getPointerTy()); + return getFrameIndex(FrameIdx, TLI->getPointerTy()); } /// CreateStackTemporary - Create a stack temporary suitable for holding @@ -1545,24 +1612,30 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { VT2.getStoreSizeInBits())/8; Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); - const DataLayout *TD = TLI.getDataLayout(); + const TargetLowering *TLI = TM.getTargetLowering(); + const DataLayout *TD = TLI->getDataLayout(); unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), TD->getPrefTypeAlignment(Ty2)); MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false); - return getFrameIndex(FrameIdx, TLI.getPointerTy()); + return getFrameIndex(FrameIdx, TLI->getPointerTy()); } SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, - SDValue N2, ISD::CondCode Cond, DebugLoc dl) { + SDValue N2, ISD::CondCode Cond, SDLoc dl) { // These setcc operations always fold. switch (Cond) { default: break; case ISD::SETFALSE: case ISD::SETFALSE2: return getConstant(0, VT); case ISD::SETTRUE: - case ISD::SETTRUE2: return getConstant(1, VT); + case ISD::SETTRUE2: { + const TargetLowering *TLI = TM.getTargetLowering(); + TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(VT.isVector()); + return getConstant( + Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); + } case ISD::SETOEQ: case ISD::SETOGT: @@ -1644,7 +1717,12 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, } } else { // Ensure that the constant occurs on the RHS. - return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond)); + ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); + MVT CompVT = N1.getValueType().getSimpleVT(); + if (!TM.getTargetLowering()->isCondCodeLegal(SwappedCond, CompVT)) + return SDValue(); + + return getSetCC(dl, VT, N2, N1, SwappedCond); } } @@ -1680,6 +1758,7 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, /// processing. void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth) const { + const TargetLowering *TLI = TM.getTargetLowering(); unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. @@ -1802,7 +1881,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // The boolean result conforms to getBooleanContents. Fall through. case ISD::SETCC: // If we know the result of a setcc has the top bits zero, use this info. - if (TLI.getBooleanContents(Op.getValueType().isVector()) == + if (TLI->getBooleanContents(Op.getValueType().isVector()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); return; @@ -1942,7 +2021,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::SIGN_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InSignBit = APInt::getSignBit(InBits); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); @@ -2054,7 +2132,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, const APInt &RA = Rem->getAPIntValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); // The low bits of the first operand are unchanged by the srem. @@ -2114,7 +2191,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Allow the target to implement this method for its nodes. - TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); + TLI->computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); return; } } @@ -2125,6 +2202,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, /// information. For example, immediately after an "SRA X, 2", we know that /// the top 3 bits are all equal to each other, so we return 3. unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ + const TargetLowering *TLI = TM.getTargetLowering(); EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); unsigned VTBits = VT.getScalarType().getSizeInBits(); @@ -2149,7 +2227,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ } case ISD::SIGN_EXTEND: - Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + Tmp = + VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; case ISD::SIGN_EXTEND_INREG: @@ -2209,7 +2288,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ // The boolean result conforms to getBooleanContents. Fall through. case ISD::SETCC: // If setcc returns 0/-1, all bits are sign bits. - if (TLI.getBooleanContents(Op.getValueType().isVector()) == + if (TLI->getBooleanContents(Op.getValueType().isVector()) == TargetLowering::ZeroOrNegativeOneBooleanContent) return VTBits; break; @@ -2310,7 +2389,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) { - unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth); + unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, Depth); if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits); } @@ -2403,14 +2482,15 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { /// getNode - Gets or creates the specified node. /// -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) SDNode(Opcode, DL, getVTList(VT)); + SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), getVTList(VT)); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -2420,7 +2500,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue Operand) { // Constant fold unary operations with an integer constant operand. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) { @@ -2671,10 +2751,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, Operand); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, Operand); } AllNodes.push_back(N); @@ -2789,11 +2871,11 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, return Outputs.back(); // Otherwise build a big vector out of the scalar elements we generated. - return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(), + return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs.data(), Outputs.size()); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); @@ -3072,9 +3154,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, if (VT.isSimple() && N1.getValueType().isSimple()) { assert(VT.isVector() && N1.getValueType().isVector() && "Extract subvector VTs must be a vectors!"); - assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() && + assert(VT.getVectorElementType() == + N1.getValueType().getVectorElementType() && "Extract subvector VTs must have the same element type!"); - assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() && + assert(VT.getSimpleVT() <= N1.getSimpleValueType() && "Extract subvector must be from larger vector to smaller vector!"); if (isa<ConstantSDNode>(Index.getNode())) { @@ -3085,7 +3168,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, } // Trivial extraction. - if (VT.getSimpleVT() == N1.getValueType().getSimpleVT()) + if (VT.getSimpleVT() == N1.getSimpleValueType()) return N1; } break; @@ -3243,10 +3326,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2); } AllNodes.push_back(N); @@ -3256,11 +3341,26 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3) { // Perform various simplifications. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); switch (Opcode) { + case ISD::FMA: { + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); + ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3); + if (N1CFP && N2CFP && N3CFP) { + APFloat V1 = N1CFP->getValueAPF(); + const APFloat &V2 = N2CFP->getValueAPF(); + const APFloat &V3 = N3CFP->getValueAPF(); + APFloat::opStatus s = + V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); + if (s != APFloat::opInvalidOp) + return getConstantFP(V1, VT); + } + break; + } case ISD::CONCAT_VECTORS: // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to // one big BUILD_VECTOR. @@ -3300,7 +3400,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, "Insert subvector VTs must be a vectors"); assert(VT == N1.getValueType() && "Dest and insert subvector source types must match!"); - assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() && + assert(N2.getSimpleValueType() <= N1.getSimpleValueType() && "Insert subvector must be from smaller vector to larger vector!"); if (isa<ConstantSDNode>(Index.getNode())) { assert((N2.getValueType().getVectorNumElements() + @@ -3310,7 +3410,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, } // Trivial insertion. - if (VT.getSimpleVT() == N2.getValueType().getSimpleVT()) + if (VT.getSimpleVT() == N2.getSimpleValueType()) return N2; } break; @@ -3333,10 +3433,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2, N3); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2, N3); } AllNodes.push_back(N); @@ -3346,14 +3448,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; return getNode(Opcode, DL, VT, Ops, 4); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; @@ -3379,14 +3481,14 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { ArgChains.push_back(SDValue(L, 1)); // Build a tokenfactor for all the chains. - return getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other, + return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, &ArgChains[0], ArgChains.size()); } /// getMemsetValue - Vectorized representation of the memset value /// operand. static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, - DebugLoc dl) { + SDLoc dl) { assert(Value.getOpcode() != ISD::UNDEF); unsigned NumBits = VT.getScalarType().getSizeInBits(); @@ -3412,7 +3514,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, /// getMemsetStringVal - Similar to getMemsetValue. Except this is only /// used when a memcpy is turned into a memset when the source is a constant /// string ptr. -static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, +static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, const TargetLowering &TLI, StringRef Str) { // Handle vector with all elements zero. if (Str.empty()) { @@ -3454,10 +3556,10 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, /// getMemBasePlusOffset - Returns base and offset node for the /// -static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, +static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SDLoc dl, SelectionDAG &DAG) { EVT VT = Base.getValueType(); - return DAG.getNode(ISD::ADD, Base.getDebugLoc(), + return DAG.getNode(ISD::ADD, dl, VT, Base, DAG.getConstant(Offset, VT)); } @@ -3585,7 +3687,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, return true; } -static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, +static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, unsigned Align, bool isVol, @@ -3630,7 +3732,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); // Don't promote to an alignment that would require dynamic stack - // realignment. + // realignment. const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) while (NewAlign > Align && @@ -3671,7 +3773,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff)); if (Value.getNode()) Store = DAG.getStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, DAG), + getMemBasePlusOffset(Dst, DstOff, dl, DAG), DstPtrInfo.getWithOffset(DstOff), isVol, false, Align); } @@ -3685,11 +3787,11 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); assert(NVT.bitsGE(VT)); Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, - getMemBasePlusOffset(Src, SrcOff, DAG), + getMemBasePlusOffset(Src, SrcOff, dl, DAG), SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false, MinAlign(SrcAlign, SrcOff)); Store = DAG.getTruncStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, DAG), + getMemBasePlusOffset(Dst, DstOff, dl, DAG), DstPtrInfo.getWithOffset(DstOff), VT, isVol, false, Align); } @@ -3703,7 +3805,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, &OutChains[0], OutChains.size()); } -static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, +static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, unsigned Align, bool isVol, @@ -3755,10 +3857,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; - SDValue Value, Store; + SDValue Value; Value = DAG.getLoad(VT, dl, Chain, - getMemBasePlusOffset(Src, SrcOff, DAG), + getMemBasePlusOffset(Src, SrcOff, dl, DAG), SrcPtrInfo.getWithOffset(SrcOff), isVol, false, false, SrcAlign); LoadValues.push_back(Value); @@ -3771,10 +3873,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; - SDValue Value, Store; + SDValue Store; Store = DAG.getStore(Chain, dl, LoadValues[i], - getMemBasePlusOffset(Dst, DstOff, DAG), + getMemBasePlusOffset(Dst, DstOff, dl, DAG), DstPtrInfo.getWithOffset(DstOff), isVol, false, Align); OutChains.push_back(Store); DstOff += VTSize; @@ -3784,7 +3886,25 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, &OutChains[0], OutChains.size()); } -static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, +/// \brief Lower the call to 'memset' intrinsic function into a series of store +/// operations. +/// +/// \param DAG Selection DAG where lowered code is placed. +/// \param dl Link to corresponding IR location. +/// \param Chain Control flow dependency. +/// \param Dst Pointer to destination memory location. +/// \param Src Value of byte to write into the memory. +/// \param Size Number of bytes to write. +/// \param Align Alignment of the destination in bytes. +/// \param isVol True if destination is volatile. +/// \param DstPtrInfo IR information on the memory pointer. +/// \returns New head in the control flow, if lowering was successful, empty +/// SDValue otherwise. +/// +/// The function tries to replace 'llvm.memset' intrinsic with several store +/// operations and value calculation code. This is usually profitable for small +/// memory size. +static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, unsigned Align, bool isVol, @@ -3856,7 +3976,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, } assert(Value.getValueType() == VT && "Value with wrong type."); SDValue Store = DAG.getStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, DAG), + getMemBasePlusOffset(Dst, DstOff, dl, DAG), DstPtrInfo.getWithOffset(DstOff), isVol, false, Align); OutChains.push_back(Store); @@ -3868,7 +3988,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, &OutChains[0], OutChains.size()); } -SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, +SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, @@ -3914,29 +4034,31 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, // beyond the given memory regions. But fixing this isn't easy, and most // people don't care. + const TargetLowering *TLI = TM.getTargetLowering(); + // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); + Entry.Ty = TLI->getDataLayout()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); - // FIXME: pass in DebugLoc + // FIXME: pass in SDLoc TargetLowering:: CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMCPY), + TLI->getLibcallCallingConv(RTLIB::MEMCPY), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY), - TLI.getPointerTy()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), + TLI->getPointerTy()), Args, *this, dl); - std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } -SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, +SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo, @@ -3970,29 +4092,31 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, // FIXME: If the memmove is volatile, lowering it to plain libc memmove may // not be safe. See memcpy above for more details. + const TargetLowering *TLI = TM.getTargetLowering(); + // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); + Entry.Ty = TLI->getDataLayout()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); - // FIXME: pass in DebugLoc + // FIXME: pass in SDLoc TargetLowering:: CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMMOVE), + TLI->getLibcallCallingConv(RTLIB::MEMMOVE), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE), - TLI.getPointerTy()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), + TLI->getPointerTy()), Args, *this, dl); - std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } -SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, +SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo) { @@ -4023,7 +4147,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, return Result; // Emit a library call. - Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext()); + const TargetLowering *TLI = TM.getTargetLowering(); + Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; @@ -4041,22 +4166,53 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, Entry.Ty = IntPtrTy; Entry.isSExt = false; Args.push_back(Entry); - // FIXME: pass in DebugLoc + // FIXME: pass in SDLoc TargetLowering:: CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMSET), + TLI->getLibcallCallingConv(RTLIB::MEMSET), /*isTailCall=*/false, /*doesNotReturn*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), - TLI.getPointerTy()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), + TLI->getPointerTy()), Args, *this, dl); - std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, + SDVTList VTList, SDValue* Ops, unsigned NumOps, + MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<AtomicSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + // Allocate the operands array for the node out of the BumpPtrAllocator, since + // SDNode doesn't have access to it. This memory will be "leaked" when + // the node is deallocated, but recovered when the allocator is released. + // If the number of operands is less than 5 we use AtomicSDNode's internal + // storage. + SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate<SDUse>(NumOps) : 0; + + SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, MemVT, + Ops, DynOps, NumOps, MMO, + Ordering, SynchScope); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, @@ -4084,7 +4240,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachineMemOperand *MMO, @@ -4096,25 +4252,11 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, EVT VT = Cmp.getValueType(); SDVTList VTs = getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 4); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast<AtomicSDNode>(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, - Ptr, Cmp, Swp, MMO, Ordering, - SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value* PtrVal, @@ -4145,7 +4287,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO, @@ -4169,25 +4311,11 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) : getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Val}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 3); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast<AtomicSDNode>(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, - Ptr, Val, MMO, - Ordering, SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 3, MMO, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, const Value* PtrVal, @@ -4218,7 +4346,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO, @@ -4227,26 +4355,13 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op"); SDVTList VTs = getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 2); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast<AtomicSDNode>(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, - Ptr, MMO, Ordering, SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 2, MMO, Ordering, SynchScope); } /// getMergeValues - Create a MERGE_VALUES node from the given operands. SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, - DebugLoc dl) { + SDLoc dl) { if (NumOps == 1) return Ops[0]; @@ -4259,7 +4374,7 @@ SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, } SDValue -SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps, EVT MemVT, MachinePointerInfo PtrInfo, @@ -4271,7 +4386,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, } SDValue -SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, const SDValue *Ops, unsigned NumOps, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, @@ -4294,7 +4409,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, } SDValue -SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, const SDValue *Ops, unsigned NumOps, EVT MemVT, MachineMemOperand *MMO) { assert((Opcode == ISD::INTRINSIC_VOID || @@ -4318,12 +4433,14 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, return SDValue(E, 0); } - N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, - MemVT, MMO); + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, Ops, + NumOps, MemVT, MMO); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, - MemVT, MMO); + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, Ops, + NumOps, MemVT, MMO); } AllNodes.push_back(N); return SDValue(N, 0); @@ -4365,7 +4482,7 @@ static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) { SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, - EVT VT, DebugLoc dl, SDValue Chain, + EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, bool isInvariant, @@ -4398,7 +4515,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, - EVT VT, DebugLoc dl, SDValue Chain, + EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, EVT MemVT, MachineMemOperand *MMO) { if (VT == MemVT) { @@ -4437,14 +4554,15 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, cast<LoadSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl, VTs, AM, ExtType, + SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, AM, ExtType, MemVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } -SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, +SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, @@ -4457,7 +4575,15 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, TBAAInfo, Ranges); } -SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, +SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, + SDValue Chain, SDValue Ptr, + MachineMemOperand *MMO) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, + VT, MMO); +} + +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, @@ -4469,8 +4595,16 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, } +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, + SDValue Chain, SDValue Ptr, EVT MemVT, + MachineMemOperand *MMO) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, + MemVT, MMO); +} + SDValue -SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, +SelectionDAG::getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM) { LoadSDNode *LD = cast<LoadSDNode>(OrigLoad); assert(LD->getOffset().getOpcode() == ISD::UNDEF && @@ -4481,7 +4615,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, false, LD->getAlignment()); } -SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, +SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { @@ -4508,7 +4642,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, return getStore(Chain, dl, Val, Ptr, MMO); } -SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, +SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachineMemOperand *MMO) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); @@ -4527,14 +4661,15 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, - false, VT, MMO); + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, + ISD::UNINDEXED, false, VT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } -SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, +SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT,bool isVolatile, bool isNonTemporal, unsigned Alignment, @@ -4561,7 +4696,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); } -SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, +SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, EVT SVT, MachineMemOperand *MMO) { EVT VT = Val.getValueType(); @@ -4595,15 +4730,16 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, - true, SVT, MMO); + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, + ISD::UNINDEXED, true, SVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } SDValue -SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, +SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM) { StoreSDNode *ST = cast<StoreSDNode>(OrigStore); assert(ST->getOffset().getOpcode() == ISD::UNDEF && @@ -4619,7 +4755,8 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, AM, + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, AM, ST->isTruncatingStore(), ST->getMemoryVT(), ST->getMemOperand()); @@ -4628,7 +4765,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, return SDValue(N, 0); } -SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl, +SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, SDValue SV, unsigned Align) { @@ -4636,7 +4773,7 @@ SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl, return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, const SDUse *Ops, unsigned NumOps) { switch (NumOps) { case 0: return getNode(Opcode, DL, VT); @@ -4652,7 +4789,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, return getNode(Opcode, DL, VT, &NewOps[0], NumOps); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, const SDValue *Ops, unsigned NumOps) { switch (NumOps) { case 0: return getNode(Opcode, DL, VT); @@ -4694,10 +4831,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTs, Ops, NumOps); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTs, Ops, NumOps); } AllNodes.push_back(N); @@ -4707,14 +4846,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, ArrayRef<EVT> ResultTys, const SDValue *Ops, unsigned NumOps) { return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()), Ops, NumOps); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps) { if (NumVTs == 1) @@ -4722,7 +4861,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, const SDValue *Ops, unsigned NumOps) { if (VTList.NumVTs == 1) return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps); @@ -4760,26 +4899,36 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, return SDValue(E, 0); if (NumOps == 1) { - N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0]); } else if (NumOps == 2) { - N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1]); } else if (NumOps == 3) { - N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], - Ops[2]); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1], Ops[2]); } else { - N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTList, Ops, NumOps); } CSEMap.InsertNode(N, IP); } else { if (NumOps == 1) { - N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0]); } else if (NumOps == 2) { - N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1]); } else if (NumOps == 3) { - N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], - Ops[2]); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1], Ops[2]); } else { - N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTList, Ops, NumOps); } } AllNodes.push_back(N); @@ -4789,36 +4938,36 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) { +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList) { return getNode(Opcode, DL, VTList, 0, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1) { SDValue Ops[] = { N1 }; return getNode(Opcode, DL, VTList, Ops, 1); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2) { SDValue Ops[] = { N1, N2 }; return getNode(Opcode, DL, VTList, Ops, 2); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3) { SDValue Ops[] = { N1, N2, N3 }; return getNode(Opcode, DL, VTList, Ops, 3); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; return getNode(Opcode, DL, VTList, Ops, 4); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; @@ -4830,76 +4979,81 @@ SDVTList SelectionDAG::getVTList(EVT VT) { } SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { - for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) - if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2) - return *I; - - EVT *Array = Allocator.Allocate<EVT>(2); - Array[0] = VT1; - Array[1] = VT2; - SDVTList Result = makeVTList(Array, 2); - VTList.push_back(Result); - return Result; + FoldingSetNodeID ID; + ID.AddInteger(2U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate<EVT>(2); + Array[0] = VT1; + Array[1] = VT2; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); } SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { - for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) - if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && - I->VTs[2] == VT3) - return *I; - - EVT *Array = Allocator.Allocate<EVT>(3); - Array[0] = VT1; - Array[1] = VT2; - Array[2] = VT3; - SDVTList Result = makeVTList(Array, 3); - VTList.push_back(Result); - return Result; + FoldingSetNodeID ID; + ID.AddInteger(3U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + ID.AddInteger(VT3.getRawBits()); + + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate<EVT>(3); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); } SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { - for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) - if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && - I->VTs[2] == VT3 && I->VTs[3] == VT4) - return *I; - - EVT *Array = Allocator.Allocate<EVT>(4); - Array[0] = VT1; - Array[1] = VT2; - Array[2] = VT3; - Array[3] = VT4; - SDVTList Result = makeVTList(Array, 4); - VTList.push_back(Result); - return Result; + FoldingSetNodeID ID; + ID.AddInteger(4U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + ID.AddInteger(VT3.getRawBits()); + ID.AddInteger(VT4.getRawBits()); + + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate<EVT>(4); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Array[3] = VT4; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); } SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { - switch (NumVTs) { - case 0: llvm_unreachable("Cannot have nodes without results!"); - case 1: return getVTList(VTs[0]); - case 2: return getVTList(VTs[0], VTs[1]); - case 3: return getVTList(VTs[0], VTs[1], VTs[2]); - case 4: return getVTList(VTs[0], VTs[1], VTs[2], VTs[3]); - default: break; + FoldingSetNodeID ID; + ID.AddInteger(NumVTs); + for (unsigned index = 0; index < NumVTs; index++) { + ID.AddInteger(VTs[index].getRawBits()); } - for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) { - if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1]) - continue; - - if (std::equal(&VTs[2], &VTs[NumVTs], &I->VTs[2])) - return *I; + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate<EVT>(NumVTs); + std::copy(VTs, VTs + NumVTs, Array); + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs); + VTListMap.InsertNode(Result, IP); } - - EVT *Array = Allocator.Allocate<EVT>(NumVTs); - std::copy(VTs, VTs+NumVTs, Array); - SDVTList Result = makeVTList(Array, NumVTs); - VTList.push_back(Result); - return Result; + return Result->getSDVTList(); } @@ -5138,17 +5292,21 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, return N; } -/// UpdadeDebugLocOnMergedSDNode - If the opt level is -O0 then it throws away +/// UpdadeSDLocOnMergedSDNode - If the opt level is -O0 then it throws away /// the line number information on the merged node since it is not possible to /// preserve the information that operation is associated with multiple lines. /// This will make the debugger working better at -O0, were there is a higher /// probability having other instructions associated with that line. /// -SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) { +/// For IROrder, we keep the smaller of the two +SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) { DebugLoc NLoc = N->getDebugLoc(); - if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && (OLoc != NLoc)) { + if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && + (OLoc.getDebugLoc() != NLoc)) { N->setDebugLoc(DebugLoc()); } + unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder()); + N->setIROrder(Order); return N; } @@ -5157,7 +5315,7 @@ SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) { /// /// Note that MorphNodeTo returns the resultant node. If there is already a /// node of the specified opcode and operands, it returns that node instead of -/// the current one. Note that the DebugLoc need not be the same. +/// the current one. Note that the SDLoc need not be the same. /// /// Using MorphNodeTo is faster than creating a new node and swapping it in /// with ReplaceAllUsesWith both because it often avoids allocating a new @@ -5173,7 +5331,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) - return UpdadeDebugLocOnMergedSDNode(ON, N->getDebugLoc()); + return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N)); } if (!RemoveNodeFromCSEMaps(N)) @@ -5250,20 +5408,20 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, /// node of the specified opcode and operands, it returns that node instead of /// the current one. MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) { +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT) { SDVTList VTs = getVTList(VT); return getMachineNode(Opcode, dl, VTs, None); } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) { +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, SDValue Op1) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1 }; return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2 }; @@ -5271,7 +5429,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2, Op3 }; @@ -5279,20 +5437,20 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT); return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) { +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); return getMachineNode(Opcode, dl, VTs, None); } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, SDValue Op1) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1 }; @@ -5300,7 +5458,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2 }; @@ -5308,7 +5466,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2); @@ -5317,7 +5475,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2); @@ -5325,7 +5483,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, EVT VT3, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2, VT3); @@ -5334,7 +5492,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, EVT VT3, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2, VT3); @@ -5343,7 +5501,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, EVT VT3, ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3); @@ -5351,7 +5509,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, EVT VT3, EVT VT4, ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); @@ -5359,7 +5517,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size()); @@ -5367,7 +5525,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, ArrayRef<SDValue> OpsArray) { bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; MachineSDNode *N; @@ -5380,12 +5538,13 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - return cast<MachineSDNode>(UpdadeDebugLocOnMergedSDNode(E, DL)); + return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL)); } } // Allocate a new MachineSDNode. - N = new (NodeAllocator) MachineSDNode(~Opcode, DL, VTs); + N = new (NodeAllocator) MachineSDNode(~Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs); // Initialize the operands list. if (NumOps > array_lengthof(N->LocalOperands)) @@ -5411,7 +5570,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, /// getTargetExtractSubreg - A convenience function for creating /// TargetOpcode::EXTRACT_SUBREG nodes. SDValue -SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT, +SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand) { SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, @@ -5422,7 +5581,7 @@ SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT, /// getTargetInsertSubreg - A convenience function for creating /// TargetOpcode::INSERT_SUBREG nodes. SDValue -SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT, +SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand, SDValue Subreg) { SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL, @@ -5845,18 +6004,6 @@ unsigned SelectionDAG::AssignTopologicalOrder() { return DAGSize; } -/// AssignOrdering - Assign an order to the SDNode. -void SelectionDAG::AssignOrdering(const SDNode *SD, unsigned Order) { - assert(SD && "Trying to assign an order to a null node!"); - Ordering->add(SD, Order); -} - -/// GetOrdering - Get the order for the SDNode. -unsigned SelectionDAG::GetOrdering(const SDNode *SD) const { - assert(SD && "Trying to get the order of a null node!"); - return Ordering->getOrder(SD); -} - /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the /// value is produced by SD. void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { @@ -5883,7 +6030,7 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { ClonedDVs.push_back(Clone); } } - for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(), + for (SmallVectorImpl<SDDbgValue *>::iterator I = ClonedDVs.begin(), E = ClonedDVs.end(); I != E; ++I) AddDbgValue(*I, ToNode, false); } @@ -5896,16 +6043,22 @@ HandleSDNode::~HandleSDNode() { DropOperands(); } -GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, DebugLoc DL, - const GlobalValue *GA, +GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order, + DebugLoc DL, const GlobalValue *GA, EVT VT, int64_t o, unsigned char TF) - : SDNode(Opc, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { + : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { TheGlobal = GA; } -MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, - MachineMemOperand *mmo) - : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) { +AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, DebugLoc dl, EVT VT, + SDValue X, unsigned SrcAS, + unsigned DestAS) + : UnarySDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT), X), + SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {} + +MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, + EVT memvt, MachineMemOperand *mmo) + : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); @@ -5914,10 +6067,10 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); } -MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, +MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, const SDValue *Ops, unsigned NumOps, EVT memvt, MachineMemOperand *mmo) - : SDNode(Opc, dl, VTs, Ops, NumOps), + : SDNode(Opc, Order, dl, VTs, Ops, NumOps), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant()); @@ -6064,9 +6217,10 @@ bool SDNode::hasPredecessor(const SDNode *N) const { return hasPredecessorHelper(N, Visited, Worklist); } -bool SDNode::hasPredecessorHelper(const SDNode *N, - SmallPtrSet<const SDNode *, 32> &Visited, - SmallVector<const SDNode *, 16> &Worklist) const { +bool +SDNode::hasPredecessorHelper(const SDNode *N, + SmallPtrSet<const SDNode *, 32> &Visited, + SmallVectorImpl<const SDNode *> &Worklist) const { if (Visited.empty()) { Worklist.push_back(this); } else { @@ -6103,7 +6257,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { EVT VT = N->getValueType(0); unsigned NE = VT.getVectorNumElements(); EVT EltVT = VT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SmallVector<SDValue, 8> Scalars; SmallVector<SDValue, 4> Operands(N->getNumOperands()); @@ -6121,11 +6275,12 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { EVT OperandVT = Operand.getValueType(); if (OperandVT.isVector()) { // A vector operand; extract a single element. + const TargetLowering *TLI = TM.getTargetLowering(); EVT OperandEltVT = OperandVT.getVectorElementType(); Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, - getConstant(i, TLI.getPointerTy())); + getConstant(i, TLI->getVectorIdxTy())); } else { // A scalar operand; just use it as is. Operands[j] = Operand; @@ -6147,8 +6302,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { case ISD::ROTL: case ISD::ROTR: Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], - getShiftAmountOperand(Operands[0].getValueType(), - Operands[1]))); + getShiftAmountOperand(Operands[0].getValueType(), + Operands[1]))); break; case ISD::SIGN_EXTEND_INREG: case ISD::FP_ROUND_INREG: { @@ -6203,8 +6358,9 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, const GlobalValue *GV2 = NULL; int64_t Offset1 = 0; int64_t Offset2 = 0; - bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); - bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); + const TargetLowering *TLI = TM.getTargetLowering(); + bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1); + bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); if (isGA1 && isGA2 && GV1 == GV2) return Offset1 == (Offset2 + Dist*Bytes); return false; @@ -6217,11 +6373,12 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. const GlobalValue *GV; int64_t GVOffset = 0; - if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - unsigned PtrWidth = TLI.getPointerTy().getSizeInBits(); + const TargetLowering *TLI = TM.getTargetLowering(); + if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { + unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType()); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, - TLI.getDataLayout()); + TLI->getDataLayout()); unsigned AlignBits = KnownZero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) @@ -6251,6 +6408,38 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { return 0; } +/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type +/// which is split (or expanded) into two not necessarily identical pieces. +std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const { + // Currently all types are split in half. + EVT LoVT, HiVT; + if (!VT.isVector()) { + LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT); + } else { + unsigned NumElements = VT.getVectorNumElements(); + assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + LoVT = HiVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), + NumElements/2); + } + return std::make_pair(LoVT, HiVT); +} + +/// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the +/// low/high part. +std::pair<SDValue, SDValue> +SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, + const EVT &HiVT) { + assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <= + N.getValueType().getVectorNumElements() && + "More vector elements requested than available!"); + SDValue Lo, Hi; + Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, + getConstant(0, TLI->getVectorIdxTy())); + Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N, + getConstant(LoVT.getVectorNumElements(), TLI->getVectorIdxTy())); + return std::make_pair(Lo, Hi); +} + // getAddressSpace - Return the address space this GlobalAddress belongs to. unsigned GlobalAddressSDNode::getAddressSpace() const { return getGlobal()->getType()->getAddressSpace(); @@ -6372,7 +6561,7 @@ static void checkForCyclesHelper(const SDNode *N, void llvm::checkForCycles(const llvm::SDNode *N) { #ifdef XDEBUG - assert(N && "Checking nonexistant SDNode"); + assert(N && "Checking nonexistent SDNode"); SmallPtrSet<const SDNode*, 32> visited; SmallPtrSet<const SDNode*, 32> checked; checkForCyclesHelper(N, visited, checked); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 194aba8..2b2713d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -15,6 +15,7 @@ #include "SelectionDAGBuilder.h" #include "SDNodeDbgValue.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" @@ -32,6 +33,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" @@ -48,7 +50,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/IntegersSubsetMapping.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" @@ -57,6 +58,7 @@ #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include <algorithm> using namespace llvm; @@ -87,7 +89,7 @@ LimitFPPrecision("limit-float-precision", // store [4096 x i8] %data, [4096 x i8]* %buffer static const unsigned MaxParallelChains = 64; -static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V); @@ -96,7 +98,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, /// larger then ValueVT then AssertOp can be used to specify whether the extra /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT /// (ISD::AssertSext). -static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, +static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, @@ -217,7 +219,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, /// type larger then ValueVT then AssertOp can be used to specify whether the /// extra bits are known to be zero (ISD::AssertZext) or sign extended from /// ValueVT (ISD::AssertSext). -static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V) { assert(ValueVT.isVector() && "Not a vector value"); @@ -280,7 +282,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && "Cannot narrow, it would be a lossy transformation"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); } // Vector/Vector bitcast. @@ -327,14 +329,14 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } -static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl, +static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc dl, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. -static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, +static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { @@ -466,7 +468,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, /// getCopyToPartsVector - Create a series of nodes that contain the specified /// value split into legal parts. -static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, +static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V) { EVT ValueVT = Val.getValueType(); @@ -489,7 +491,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, SmallVector<SDValue, 16> Ops; for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - ElementVT, Val, DAG.getIntPtrConstant(i))); + ElementVT, Val, DAG.getConstant(i, + TLI.getVectorIdxTy()))); for (unsigned i = ValueVT.getVectorNumElements(), e = PartVT.getVectorNumElements(); i != e; ++i) @@ -515,7 +518,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - PartVT, Val, DAG.getIntPtrConstant(0)); + PartVT, Val, DAG.getConstant(0, TLI.getVectorIdxTy())); bool Smaller = ValueVT.bitsLE(PartVT); Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), @@ -545,10 +548,12 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, if (IntermediateVT.isVector()) Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, - DAG.getIntPtrConstant(i * (NumElements / NumIntermediates))); + DAG.getConstant(i * (NumElements / NumIntermediates), + TLI.getVectorIdxTy())); else Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - IntermediateVT, Val, DAG.getIntPtrConstant(i)); + IntermediateVT, Val, + DAG.getConstant(i, TLI.getVectorIdxTy())); } // Split the intermediate operands into legal parts. @@ -644,7 +649,7 @@ namespace { /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, - DebugLoc dl, + SDLoc dl, SDValue &Chain, SDValue *Flag, const Value *V = 0) const; @@ -652,7 +657,7 @@ namespace { /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. - void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + void getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, SDValue *Flag, const Value *V) const; /// AddInlineAsmOperands - Add this value to the specified inlineasm node @@ -671,7 +676,7 @@ namespace { /// If the Flag pointer is NULL, no flag is used. SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, - DebugLoc dl, + SDLoc dl, SDValue &Chain, SDValue *Flag, const Value *V) const { // A Value with type {} or [0 x %t] needs no registers. @@ -717,6 +722,14 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, unsigned NumSignBits = LOI->NumSignBits; unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); + if (NumZeroBits == RegSize) { + // The current value is a zero. + // Explicitly express that as it would be easier for + // optimizations to kick in. + Parts[i] = DAG.getConstant(0, RegisterVT); + continue; + } + // FIXME: We capture more information than the dag can represent. For // now, just use the tightest assertzext/assertsext possible. bool isSExt = true; @@ -761,7 +774,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. -void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, SDValue *Flag, const Value *V) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -869,7 +882,7 @@ void SelectionDAGBuilder::clear() { UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); - CurDebugLoc = DebugLoc(); + CurInst = NULL; HasTailCall = false; } @@ -900,7 +913,7 @@ SDValue SelectionDAGBuilder::getRoot() { } // Otherwise, we have to make a token factor node. - SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &PendingLoads[0], PendingLoads.size()); PendingLoads.clear(); DAG.setRoot(Root); @@ -930,7 +943,7 @@ SDValue SelectionDAGBuilder::getControlRoot() { PendingExports.push_back(Root); } - Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &PendingExports[0], PendingExports.size()); PendingExports.clear(); @@ -938,27 +951,21 @@ SDValue SelectionDAGBuilder::getControlRoot() { return Root; } -void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) { - if (DAG.GetOrdering(Node) != 0) return; // Already has ordering. - DAG.AssignOrdering(Node, SDNodeOrder); - - for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) - AssignOrderingToNode(Node->getOperand(I).getNode()); -} - void SelectionDAGBuilder::visit(const Instruction &I) { // Set up outgoing PHI node register values before emitting the terminator. if (isa<TerminatorInst>(&I)) HandlePHINodesInSuccessorBlocks(I.getParent()); - CurDebugLoc = I.getDebugLoc(); + ++SDNodeOrder; + + CurInst = &I; visit(I.getOpcode(), I); if (!isa<TerminatorInst>(&I) && !HasTailCall) CopyToExportRegsIfNeeded(&I); - CurDebugLoc = DebugLoc(); + CurInst = NULL; } void SelectionDAGBuilder::visitPHI(const PHINode &) { @@ -975,12 +982,6 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; #include "llvm/IR/Instruction.def" } - - // Assign the ordering to the freshly created DAG nodes. - if (NodeMap.count(&I)) { - ++SDNodeOrder; - AssignOrderingToNode(getValue(&I).getNode()); - } } // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, @@ -1002,7 +1003,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DAG.AddDbgValue(SDV, Val.getNode(), false); } } else - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); DanglingDebugInfoMap[V] = DanglingDebugInfo(); } } @@ -1020,9 +1021,10 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; - RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); + RegsForValue RFV(*DAG.getContext(), *TM.getTargetLowering(), + InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); - N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); + N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V); resolveDanglingDebugInfo(V, N); return N; } @@ -1051,17 +1053,21 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { /// getValueImpl - Helper function for getValue and getNonRegisterValue. /// Create an SDValue for the given value. SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { + const TargetLowering *TLI = TM.getTargetLowering(); + if (const Constant *C = dyn_cast<Constant>(V)) { - EVT VT = TLI.getValueType(V->getType(), true); + EVT VT = TLI->getValueType(V->getType(), true); if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) return DAG.getConstant(*CI, VT); if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) - return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT); + return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); - if (isa<ConstantPointerNull>(C)) - return DAG.getConstant(0, TLI.getPointerTy()); + if (isa<ConstantPointerNull>(C)) { + unsigned AS = V->getType()->getPointerAddressSpace(); + return DAG.getConstant(0, TLI->getPointerTy(AS)); + } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) return DAG.getConstantFP(*CFP, VT); @@ -1090,9 +1096,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } return DAG.getMergeValues(&Constants[0], Constants.size(), - getCurDebugLoc()); + getCurSDLoc()); } - + if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(C)) { SmallVector<SDValue, 4> Ops; @@ -1105,8 +1111,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } if (isa<ArrayType>(CDS->getType())) - return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc()); - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + return DAG.getMergeValues(&Ops[0], Ops.size(), getCurSDLoc()); + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, &Ops[0], Ops.size()); } @@ -1115,7 +1121,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { "Unknown struct or array constant!"); SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, C->getType(), ValueVTs); + ComputeValueVTs(*TLI, C->getType(), ValueVTs); unsigned NumElts = ValueVTs.size(); if (NumElts == 0) return SDValue(); // empty struct @@ -1131,7 +1137,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } return DAG.getMergeValues(&Constants[0], NumElts, - getCurDebugLoc()); + getCurSDLoc()); } if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) @@ -1148,7 +1154,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Ops.push_back(getValue(CV->getOperand(i))); } else { assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); - EVT EltVT = TLI.getValueType(VecTy->getElementType()); + EVT EltVT = TLI->getValueType(VecTy->getElementType()); SDValue Op; if (EltVT.isFloatingPoint()) @@ -1159,7 +1165,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } // Create a BUILD_VECTOR node. - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, &Ops[0], Ops.size()); } @@ -1169,21 +1175,22 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) - return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); + return DAG.getFrameIndex(SI->second, TLI->getPointerTy()); } // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast<Instruction>(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); - RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); + RegsForValue RFV(*DAG.getContext(), *TLI, InReg, Inst->getType()); SDValue Chain = DAG.getEntryNode(); - return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V); } llvm_unreachable("Can't get register for value!"); } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { + const TargetLowering *TLI = TM.getTargetLowering(); SDValue Chain = getControlRoot(); SmallVector<ISD::OutputArg, 8> Outs; SmallVector<SDValue, 8> OutVals; @@ -1196,7 +1203,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. SmallVector<EVT, 1> PtrValueVTs; - ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), + ComputeValueVTs(*TLI, PointerType::getUnqual(F->getReturnType()), PtrValueVTs); SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); @@ -1204,26 +1211,26 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); + ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); SmallVector<SDValue, 4> Chains(NumValues); for (unsigned i = 0; i != NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), + SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), RetPtr.getValueType(), RetPtr, DAG.getIntPtrConstant(Offsets[i])); Chains[i] = - DAG.getStore(Chain, getCurDebugLoc(), + DAG.getStore(Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), // FIXME: better loc info would be nice. Add, MachinePointerInfo(), false, false, 0); } - Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], NumValues); } else if (I.getNumOperands() != 0) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); + ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues) { SDValue RetOp = getValue(I.getOperand(0)); @@ -1241,12 +1248,12 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ExtendKind = ISD::ZERO_EXTEND; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) - VT = TLI.getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind); + VT = TLI->getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind); - unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); - MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); + unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), VT); + MVT PartVT = TLI->getRegisterType(*DAG.getContext(), VT); SmallVector<SDValue, 4> Parts(NumParts); - getCopyToParts(DAG, getCurDebugLoc(), + getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), &Parts[0], NumParts, PartVT, &I, ExtendKind); @@ -1264,7 +1271,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { for (unsigned i = 0; i < NumParts; ++i) { Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), - /*isfixed=*/true, 0, 0)); + VT, /*isfixed=*/true, 0, 0)); OutVals.push_back(Parts[i]); } } @@ -1274,8 +1281,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); CallingConv::ID CallConv = DAG.getMachineFunction().getFunction()->getCallingConv(); - Chain = TLI.LowerReturn(Chain, CallConv, isVarArg, - Outs, OutVals, getCurDebugLoc(), DAG); + Chain = TM.getTargetLowering()->LowerReturn(Chain, CallConv, isVarArg, + Outs, OutVals, getCurSDLoc(), + DAG); // Verify that the target's LowerReturn behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && @@ -1474,7 +1482,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, /// If we should emit this as a bunch of and/or'd together conditions, return /// false. bool -SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){ +SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) { if (Cases.size() != 2) return true; // If this is two comparisons of the same values or'd or and'd together, they @@ -1519,7 +1527,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // If this is not a fall-through branch, emit the branch. if (Succ0MBB != NextBlock) - DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Succ0MBB))); @@ -1548,7 +1556,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo // if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { - if (!TLI.isJumpExpensive() && + if (!TM.getTargetLowering()->isJumpExpensive() && BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || BOp->getOpcode() == Instruction::Or)) { @@ -1596,7 +1604,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB) { SDValue Cond; SDValue CondLHS = getValue(CB.CmpLHS); - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); // Build the setcc now. if (CB.CmpMHS == NULL) { @@ -1612,18 +1620,17 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } else Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); } else { - assert(CB.CC == ISD::SETCC_INVALID && - "Condition is undefined for to-the-range belonging check."); + assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); SDValue CmpOp = getValue(CB.CmpMHS); EVT VT = CmpOp.getValueType(); - - if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) { + + if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), - ISD::SETULE); + ISD::SETLE); } else { SDValue SUB = DAG.getNode(ISD::SUB, dl, VT, CmpOp, DAG.getConstant(Low, VT)); @@ -1671,11 +1678,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { // Emit the code for the jump table assert(JT.Reg != -1U && "Should lower JT Header first!"); - EVT PTy = TLI.getPointerTy(); - SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), + EVT PTy = TM.getTargetLowering()->getPointerTy(); + SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); - SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(), + SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(), MVT::Other, Index.getValue(1), Table, Index); DAG.setRoot(BrJumpTable); @@ -1691,7 +1698,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // difference between smallest and largest cases. SDValue SwitchOp = getValue(JTH.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, DAG.getConstant(JTH.First, VT)); // The SDNode we just created, which holds the value being switched on minus @@ -1699,19 +1706,22 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // can be used as an index into the jump table in a subsequent basic block. // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. - SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy()); + const TargetLowering *TLI = TM.getTargetLowering(); + SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI->getPointerTy()); - unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); - SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + unsigned JumpTableReg = FuncInfo.CreateReg(TLI->getPointerTy()); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; // Emit the range check for the jump table, and branch to the default block // for the switch statement if the value being switched on exceeds the largest // case in the switch. - SDValue CMP = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(Sub.getValueType()), Sub, - DAG.getConstant(JTH.Last-JTH.First,VT), + SDValue CMP = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, + DAG.getConstant(JTH.Last - JTH.First,VT), ISD::SETUGT); // Set NextBlock to be the MBB immediately after the current one, if any. @@ -1722,17 +1732,88 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; - SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), MVT::Other, CopyTo, CMP, DAG.getBasicBlock(JT.Default)); if (JT.MBB != NextBlock) - BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond, + BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond, DAG.getBasicBlock(JT.MBB)); DAG.setRoot(BrCond); } +/// Codegen a new tail for a stack protector check ParentMBB which has had its +/// tail spliced into a stack protector check success bb. +/// +/// For a high level explanation of how this fits into the stack protector +/// generation see the comment on the declaration of class +/// StackProtectorDescriptor. +void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, + MachineBasicBlock *ParentBB) { + + // First create the loads to the guard/stack slot for the comparison. + const TargetLowering *TLI = TM.getTargetLowering(); + EVT PtrTy = TLI->getPointerTy(); + + MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); + int FI = MFI->getStackProtectorIndex(); + + const Value *IRGuard = SPD.getGuard(); + SDValue GuardPtr = getValue(IRGuard); + SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); + + unsigned Align = + TLI->getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); + SDValue Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + GuardPtr, MachinePointerInfo(IRGuard, 0), + true, false, false, Align); + + SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + StackSlotPtr, + MachinePointerInfo::getFixedStack(FI), + true, false, false, Align); + + // Perform the comparison via a subtract/getsetcc. + EVT VT = Guard.getValueType(); + SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot); + + SDValue Cmp = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(0, VT), + ISD::SETNE); + + // If the sub is not 0, then we know the guard/stackslot do not equal, so + // branch to failure MBB. + SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + MVT::Other, StackSlot.getOperand(0), + Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); + // Otherwise branch to success MBB. + SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(), + MVT::Other, BrCond, + DAG.getBasicBlock(SPD.getSuccessMBB())); + + DAG.setRoot(Br); +} + +/// Codegen the failure basic block for a stack protector check. +/// +/// A failure stack protector machine basic block consists simply of a call to +/// __stack_chk_fail(). +/// +/// For a high level explanation of how this fits into the stack protector +/// generation see the comment on the declaration of class +/// StackProtectorDescriptor. +void +SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { + const TargetLowering *TLI = TM.getTargetLowering(); + SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, + MVT::isVoid, 0, 0, false, getCurSDLoc(), + false, false).second; + DAG.setRoot(Chain); +} + /// visitBitTestHeader - This function emits necessary code to produce value /// suitable for "bit tests" void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, @@ -1740,18 +1821,20 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, // Subtract the minimum value SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, DAG.getConstant(B.First, VT)); // Check range - SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(Sub.getValueType()), + const TargetLowering *TLI = TM.getTargetLowering(); + SDValue RangeCmp = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT); // Determine the type of the test operands. bool UsePtrType = false; - if (!TLI.isTypeLegal(VT)) + if (!TLI->isTypeLegal(VT)) UsePtrType = true; else { for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) @@ -1763,13 +1846,13 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, } } if (UsePtrType) { - VT = TLI.getPointerTy(); - Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT); + VT = TLI->getPointerTy(); + Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT); } B.RegVT = VT.getSimpleVT(); B.Reg = FuncInfo.CreateReg(B.RegVT); - SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), B.Reg, Sub); // Set NextBlock to be the MBB immediately after the current one, if any. @@ -1784,12 +1867,12 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, addSuccessorWithWeight(SwitchBB, B.Default); addSuccessorWithWeight(SwitchBB, MBB); - SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurSDLoc(), MVT::Other, CopyTo, RangeCmp, DAG.getBasicBlock(B.Default)); if (MBB != NextBlock) - BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo, + BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo, DAG.getBasicBlock(MBB)); DAG.setRoot(BrRange); @@ -1803,35 +1886,36 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, BitTestCase &B, MachineBasicBlock *SwitchBB) { MVT VT = BB.RegVT; - SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), + SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), Reg, VT); SDValue Cmp; unsigned PopCount = CountPopulation_64(B.Mask); + const TargetLowering *TLI = TM.getTargetLowering(); if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. - Cmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(VT), + Cmp = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), VT), ShiftOp, - DAG.getConstant(CountTrailingZeros_64(B.Mask), VT), + DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. - Cmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(VT), + Cmp = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), VT), ShiftOp, DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), ISD::SETNE); } else { // Make desired shift - SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT, + SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT, DAG.getConstant(1, VT), ShiftOp); // Emit bit tests and jumps - SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), + SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(), VT, SwitchVal, DAG.getConstant(B.Mask, VT)); - Cmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(VT), + Cmp = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), VT), AndOp, DAG.getConstant(0, VT), ISD::SETNE); } @@ -1841,7 +1925,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); - SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurSDLoc(), MVT::Other, getControlRoot(), Cmp, DAG.getBasicBlock(B.TargetBB)); @@ -1853,7 +1937,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, NextBlock = BBI; if (NextMBB != NextBlock) - BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd, + BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd, DAG.getBasicBlock(NextMBB)); DAG.setRoot(BrAnd); @@ -1885,7 +1969,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { addSuccessorWithWeight(InvokeMBB, LandingPad); // Drop into normal successor. - DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Return))); } @@ -1904,28 +1988,29 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother to create these DAG nodes. - if (TLI.getExceptionPointerRegister() == 0 && - TLI.getExceptionSelectorRegister() == 0) + const TargetLowering *TLI = TM.getTargetLowering(); + if (TLI->getExceptionPointerRegister() == 0 && + TLI->getExceptionSelectorRegister() == 0) return; SmallVector<EVT, 2> ValueVTs; - ComputeValueVTs(TLI, LP.getType(), ValueVTs); + ComputeValueVTs(*TLI, LP.getType(), ValueVTs); assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); // Get the two live-in registers as SDValues. The physregs have already been // copied into virtual registers. SDValue Ops[2]; Ops[0] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurDebugLoc(), - FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), - getCurDebugLoc(), ValueVTs[0]); + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + FuncInfo.ExceptionPointerVirtReg, TLI->getPointerTy()), + getCurSDLoc(), ValueVTs[0]); Ops[1] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurDebugLoc(), - FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), - getCurDebugLoc(), ValueVTs[1]); + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + FuncInfo.ExceptionSelectorVirtReg, TLI->getPointerTy()), + getCurSDLoc(), ValueVTs[1]); // Merge into one. - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&ValueVTs[0], ValueVTs.size()), &Ops[0], 2); setValue(&LP, Res); @@ -1979,7 +2064,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, SDValue CondLHS = getValue(SV); EVT VT = CondLHS.getValueType(); - DebugLoc DL = getCurDebugLoc(); + SDLoc DL = getCurSDLoc(); SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, DAG.getConstant(CommonBit, VT)); @@ -2030,12 +2115,11 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, // The last case block won't fall through into 'NextBlock' if we emit the // branches in this order. See if rearranging a case value would help. // We start at the bottom as it's the case with the least weight. - for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I){ + for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I) if (I->BB == NextBlock) { std::swap(*I, BackCase); break; } - } } // Create a CaseBlock record representing a conditional branch to @@ -2062,7 +2146,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, CC = ISD::SETEQ; LHS = SV; RHS = I->High; MHS = NULL; } else { - CC = ISD::SETCC_INVALID; + CC = ISD::SETLE; LHS = I->Low; MHS = SV; RHS = I->High; } @@ -2096,7 +2180,7 @@ static inline bool areJTsAllowed(const TargetLowering &TLI) { static APInt ComputeRange(const APInt &First, const APInt &Last) { uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; - APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth); + APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth); return (LastExt - FirstExt + 1ULL); } @@ -2116,7 +2200,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) TSize += I->size(); - if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) + const TargetLowering *TLI = TM.getTargetLowering(); + if (!areJTsAllowed(*TLI) || TSize.ult(TLI->getMinimumJumpTableEntries())) return false; APInt Range = ComputeRange(First, Last); @@ -2162,7 +2247,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, const APInt &Low = cast<ConstantInt>(I->Low)->getValue(); const APInt &High = cast<ConstantInt>(I->High)->getValue(); - if (Low.ule(TEI) && TEI.ule(High)) { + if (Low.sle(TEI) && TEI.sle(High)) { DestBBs.push_back(I->BB); if (TEI==High) ++I; @@ -2177,7 +2262,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = DestWeights.find(I->BB); - if (Itr != DestWeights.end()) + if (Itr != DestWeights.end()) Itr->second += I->ExtraWeight; else DestWeights[I->BB] = I->ExtraWeight; @@ -2197,7 +2282,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, } // Create a jump table index for this jump table. - unsigned JTEncoding = TLI.getJumpTableEncoding(); + unsigned JTEncoding = TLI->getJumpTableEncoding(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) ->createJumpTableIndex(DestBBs); @@ -2217,8 +2302,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, CaseRecVector& WorkList, const Value* SV, - MachineBasicBlock *Default, - MachineBasicBlock *SwitchBB) { + MachineBasicBlock* Default, + MachineBasicBlock* SwitchBB) { // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. MachineFunction *CurMF = FuncInfo.MF; @@ -2282,7 +2367,9 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, LSize += J->size(); RSize -= J->size(); } - if (areJTsAllowed(TLI)) { + + const TargetLowering *TLI = TM.getTargetLowering(); + if (areJTsAllowed(*TLI)) { // If our case is dense we *really* should handle it earlier! assert((FMetric > 0) && "Should handle dense range earlier!"); } else { @@ -2334,7 +2421,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, // Create a CaseBlock record representing a conditional branch to // the LHS node if the value being switched on SV is less than C. // Otherwise, branch to LHS. - CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); + CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); if (CR.CaseBB == SwitchBB) visitSwitchCase(CB, SwitchBB); @@ -2351,8 +2438,9 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, CaseRecVector& WorkList, const Value* SV, MachineBasicBlock* Default, - MachineBasicBlock *SwitchBB){ - EVT PTy = TLI.getPointerTy(); + MachineBasicBlock* SwitchBB) { + const TargetLowering *TLI = TM.getTargetLowering(); + EVT PTy = TLI->getPointerTy(); unsigned IntPtrBits = PTy.getSizeInBits(); Case& FrontCase = *CR.Range.first; @@ -2363,7 +2451,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, MachineFunction *CurMF = FuncInfo.MF; // If target does not have legal shift left, do not emit bit tests at all. - if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy())) + if (!TLI->isOperationLegal(ISD::SHL, PTy)) return false; size_t numCmps = 0; @@ -2406,7 +2494,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, // Optimize the case where all the case values fit in a // word without having to subtract minValue. In this case, // we can optimize away the subtraction. - if (maxValue.ult(IntPtrBits)) { + if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { cmpRange = maxValue; } else { lowBound = minValue; @@ -2481,12 +2569,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, /// Clusterify - Transform simple list of Cases into list of CaseRange's size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, const SwitchInst& SI) { - - /// Use a shorter form of declaration, and also - /// show the we want to use CRSBuilder as Clusterifier. - typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier; - - Clusterifier TheClusterifier; + size_t numCmps = 0; BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases @@ -2495,27 +2578,40 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; - TheClusterifier.add(i.getCaseValueEx(), SMBB, - BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0); - } - - TheClusterifier.optimize(); - - size_t numCmps = 0; - for (Clusterifier::RangeIterator i = TheClusterifier.begin(), - e = TheClusterifier.end(); i != e; ++i, ++numCmps) { - Clusterifier::Cluster &C = *i; - // Update edge weight for the cluster. - unsigned W = C.first.Weight; - - // FIXME: Currently work with ConstantInt based numbers. - // Changing it to APInt based is a pretty heavy for this commit. - Cases.push_back(Case(C.first.getLow().toConstantInt(), - C.first.getHigh().toConstantInt(), C.second, W)); - - if (C.first.getLow() != C.first.getHigh()) - // A range counts double, since it requires two compares. - ++numCmps; + uint32_t ExtraWeight = + BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0; + + Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), + SMBB, ExtraWeight)); + } + std::sort(Cases.begin(), Cases.end(), CaseCmp()); + + // Merge case into clusters + if (Cases.size() >= 2) + // Must recompute end() each iteration because it may be + // invalidated by erase if we hold on to it + for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin()); + J != Cases.end(); ) { + const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); + const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); + MachineBasicBlock* nextBB = J->BB; + MachineBasicBlock* currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { + I->High = J->High; + I->ExtraWeight += J->ExtraWeight; + J = Cases.erase(J); + } else { + I = J++; + } + } + + for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { + if (I->Low != I->High) + // A range counts double, since it requires two compares. + ++numCmps; } return numCmps; @@ -2549,7 +2645,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // If this is not a fall-through branch, emit the branch. SwitchMBB->addSuccessor(Default); if (Default != NextBlock) - DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Default))); @@ -2616,7 +2712,7 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { addSuccessorWithWeight(IndirectBrMBB, Succ); } - DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(), MVT::Other, getControlRoot(), getValue(I.getAddress()))); } @@ -2627,7 +2723,7 @@ void SelectionDAGBuilder::visitFSub(const User &I) { if (isa<Constant>(I.getOperand(0)) && I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(), Op2.getValueType(), Op2)); return; } @@ -2638,7 +2734,7 @@ void SelectionDAGBuilder::visitFSub(const User &I) { void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(), + setValue(&I, DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), Op1, Op2)); } @@ -2646,13 +2742,13 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - EVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType()); + EVT ShiftTy = TM.getTargetLowering()->getShiftAmountTy(Op2.getValueType()); // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { unsigned ShiftSize = ShiftTy.getSizeInBits(); unsigned Op2Size = Op2.getValueType().getSizeInBits(); - DebugLoc DL = getCurDebugLoc(); + SDLoc DL = getCurSDLoc(); // If the operand is smaller than the shift count type, promote it. if (ShiftSize > Op2Size) @@ -2670,7 +2766,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); } - setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2)); } @@ -2684,9 +2780,10 @@ void SelectionDAGBuilder::visitSDiv(const User &I) { if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() && !isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue()) - setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG)); + setValue(&I, TM.getTargetLowering()->BuildExactSDIV(Op1, Op2, + getCurSDLoc(), DAG)); else - setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(), + setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, Op2)); } @@ -2700,8 +2797,8 @@ void SelectionDAGBuilder::visitICmp(const User &I) { SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); } void SelectionDAGBuilder::visitFCmp(const User &I) { @@ -2715,13 +2812,13 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { ISD::CondCode Condition = getFCmpCondCode(predicate); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); } void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I.getType(), ValueVTs); + ComputeValueVTs(*TM.getTargetLowering(), I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -2733,7 +2830,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { ISD::VSELECT : ISD::SELECT; for (unsigned i = 0; i != NumValues; ++i) - Values[i] = DAG.getNode(OpCode, getCurDebugLoc(), + Values[i] = DAG.getNode(OpCode, getCurSDLoc(), TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), Cond, SDValue(TrueVal.getNode(), @@ -2741,7 +2838,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { SDValue(FalseVal.getNode(), FalseVal.getResNo() + i)); - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&ValueVTs[0], NumValues), &Values[0], NumValues)); } @@ -2749,117 +2846,134 @@ void SelectionDAGBuilder::visitSelect(const User &I) { void SelectionDAGBuilder::visitTrunc(const User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitZExt(const User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitSExt(const User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), + const TargetLowering *TLI = TM.getTargetLowering(); + EVT DestVT = TLI->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N, - DAG.getTargetConstant(0, TLI.getPointerTy()))); + DAG.getTargetConstant(0, TLI->getPointerTy()))); } -void SelectionDAGBuilder::visitFPExt(const User &I){ +void SelectionDAGBuilder::visitFPExt(const User &I) { // FPExt is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToUI(const User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToSI(const User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitUIToFP(const User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitSIToFP(const User &I){ +void SelectionDAGBuilder::visitSIToFP(const User &I) { // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); // BitCast assures us that source and destination are the same size so this is // either a BITCAST or a no-op. if (DestVT != N.getValueType()) - setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(), DestVT, N)); // convert types. else setValue(&I, N); // noop cast. } +void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const Value *SV = I.getOperand(0); + SDValue N = getValue(SV); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + + unsigned SrcAS = SV->getType()->getPointerAddressSpace(); + unsigned DestAS = I.getType()->getPointerAddressSpace(); + + if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) + N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); + + setValue(&I, N); +} + void SelectionDAGBuilder::visitInsertElement(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); - SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), - TLI.getPointerTy(), - getValue(I.getOperand(2))); - setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(), - TLI.getValueType(I.getType()), + SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), + getCurSDLoc(), TLI.getVectorIdxTy()); + setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), + TM.getTargetLowering()->getValueType(I.getType()), InVec, InVal, InIdx)); } void SelectionDAGBuilder::visitExtractElement(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); - SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), - TLI.getPointerTy(), - getValue(I.getOperand(1))); - setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - TLI.getValueType(I.getType()), InVec, InIdx)); + SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), + getCurSDLoc(), TLI.getVectorIdxTy()); + setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), + TM.getTargetLowering()->getValueType(I.getType()), + InVec, InIdx)); } // Utility for visitShuffleVector - Return true if every element in Mask, @@ -2880,13 +2994,14 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SmallVector<int, 8> Mask; ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); unsigned MaskNumElts = Mask.size(); - - EVT VT = TLI.getValueType(I.getType()); + + const TargetLowering *TLI = TM.getTargetLowering(); + EVT VT = TLI->getValueType(I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); if (SrcNumElts == MaskNumElts) { - setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, &Mask[0])); return; } @@ -2901,7 +3016,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (isSequentialInRange(Mask, 0, SrcNumElts, 0) && isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) { // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), VT, Src1, Src2)); return; } @@ -2909,7 +3024,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) && isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) { // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), VT, Src2, Src1)); return; } @@ -2927,10 +3042,10 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MOps2[0] = Src2; Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurDebugLoc(), VT, + getCurSDLoc(), VT, &MOps1[0], NumConcat); Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurDebugLoc(), VT, + getCurSDLoc(), VT, &MOps2[0], NumConcat); // Readjust mask for new input vector length. @@ -2942,7 +3057,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MappedOps.push_back(Idx); } - setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, &MappedOps[0])); return; } @@ -3002,8 +3117,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); else - Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT, - Src, DAG.getIntPtrConstant(StartIdx[Input])); + Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, + Src, DAG.getConstant(StartIdx[Input], + TLI->getVectorIdxTy())); } // Calculate new mask. @@ -3019,7 +3135,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MappedOps.push_back(Idx); } - setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, &MappedOps[0])); return; } @@ -3029,7 +3145,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // replacing the shuffle with extract and build vector. // to insert and build vector. EVT EltVT = VT.getVectorElementType(); - EVT PtrVT = TLI.getPointerTy(); + EVT IdxVT = TLI->getVectorIdxTy(); SmallVector<SDValue,8> Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; @@ -3041,14 +3157,14 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - EltVT, Src, DAG.getConstant(Idx, PtrVT)); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), + EltVT, Src, DAG.getConstant(Idx, IdxVT)); } Ops.push_back(Res); } - setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, &Ops[0], Ops.size())); } @@ -3062,10 +3178,11 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + const TargetLowering *TLI = TM.getTargetLowering(); SmallVector<EVT, 4> AggValueVTs; - ComputeValueVTs(TLI, AggTy, AggValueVTs); + ComputeValueVTs(*TLI, AggTy, AggValueVTs); SmallVector<EVT, 4> ValValueVTs; - ComputeValueVTs(TLI, ValTy, ValValueVTs); + ComputeValueVTs(*TLI, ValTy, ValValueVTs); unsigned NumAggValues = AggValueVTs.size(); unsigned NumValValues = ValValueVTs.size(); @@ -3089,7 +3206,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Agg.getNode(), Agg.getResNo() + i); - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&AggValueVTs[0], NumAggValues), &Values[0], NumAggValues)); } @@ -3102,8 +3219,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + const TargetLowering *TLI = TM.getTargetLowering(); SmallVector<EVT, 4> ValValueVTs; - ComputeValueVTs(TLI, ValTy, ValValueVTs); + ComputeValueVTs(*TLI, ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); @@ -3123,16 +3241,18 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : SDValue(Agg.getNode(), Agg.getResNo() + i); - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&ValValueVTs[0], NumValValues), &Values[0], NumValValues)); } void SelectionDAGBuilder::visitGetElementPtr(const User &I) { - SDValue N = getValue(I.getOperand(0)); + Value *Op0 = I.getOperand(0); // Note that the pointer operand may be a vector of pointers. Take the scalar // element which holds a pointer. - Type *Ty = I.getOperand(0)->getType()->getScalarType(); + Type *Ty = Op0->getType()->getScalarType(); + unsigned AS = Ty->getPointerAddressSpace(); + SDValue N = getValue(Op0); for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { @@ -3142,7 +3262,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (Field) { // N = N + Offset uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); - N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, DAG.getConstant(Offset, N.getValueType())); } @@ -3151,50 +3271,50 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Ty = cast<SequentialType>(Ty)->getElementType(); // If this is a constant subscript, handle it quickly. + const TargetLowering *TLI = TM.getTargetLowering(); if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; uint64_t Offs = TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); SDValue OffsVal; - EVT PTy = TLI.getPointerTy(); + EVT PTy = TLI->getPointerTy(AS); unsigned PtrBits = PTy.getSizeInBits(); if (PtrBits < 64) - OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - TLI.getPointerTy(), + OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy, DAG.getConstant(Offs, MVT::i64)); else - OffsVal = DAG.getIntPtrConstant(Offs); + OffsVal = DAG.getConstant(Offs, PTy); - N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal); continue; } // N = N + Idx * ElementSize; - APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(), + APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS), TD->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); // If the index is smaller or larger than intptr_t, truncate or extend // it. - IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType()); + IdxN = DAG.getSExtOrTrunc(IdxN, getCurSDLoc(), N.getValueType()); // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { unsigned Amt = ElementSize.logBase2(); - IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(), + IdxN = DAG.getNode(ISD::SHL, getCurSDLoc(), N.getValueType(), IdxN, DAG.getConstant(Amt, IdxN.getValueType())); } else { SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType()); - IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), + IdxN = DAG.getNode(ISD::MUL, getCurSDLoc(), N.getValueType(), IdxN, Scale); } } - N = DAG.getNode(ISD::ADD, getCurDebugLoc(), + N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, IdxN); } } @@ -3209,18 +3329,19 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { return; // getValue will auto-populate this. Type *Ty = I.getAllocatedType(); - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + const TargetLowering *TLI = TM.getTargetLowering(); + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), + std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), I.getAlignment()); SDValue AllocSize = getValue(I.getArraySize()); - EVT IntPtr = TLI.getPointerTy(); + EVT IntPtr = TLI->getPointerTy(); if (AllocSize.getValueType() != IntPtr) - AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); + AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr); - AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr, + AllocSize = DAG.getNode(ISD::MUL, getCurSDLoc(), IntPtr, AllocSize, DAG.getConstant(TySize, IntPtr)); @@ -3233,18 +3354,18 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Round the size of the allocation up to the stack alignment size // by add SA-1 to the size. - AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(), + AllocSize = DAG.getNode(ISD::ADD, getCurSDLoc(), AllocSize.getValueType(), AllocSize, DAG.getIntPtrConstant(StackAlign-1)); // Mask out the low bits for alignment purposes. - AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(), + AllocSize = DAG.getNode(ISD::AND, getCurSDLoc(), AllocSize.getValueType(), AllocSize, DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); - SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(), + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops, 3); setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); @@ -3272,7 +3393,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); + ComputeValueVTs(*TM.getTargetLowering(), Ty, ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3306,15 +3427,15 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // (MaxParallelChains should always remain as failsafe). if (ChainI == MaxParallelChains) { assert(PendingLoads.empty() && "PendingLoads must be serialized first"); - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], ChainI); Root = Chain; ChainI = 0; } - SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(), + SDValue A = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, + SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, isNonTemporal, isInvariant, Alignment, TBAAInfo, Ranges); @@ -3324,7 +3445,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } if (!ConstantMemory) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], ChainI); if (isVolatile) DAG.setRoot(Chain); @@ -3332,7 +3453,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { PendingLoads.push_back(Chain); } - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&ValueVTs[0], NumValues), &Values[0], NumValues)); } @@ -3346,7 +3467,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets); + ComputeValueVTs(*TM.getTargetLowering(), SrcV->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3370,30 +3491,28 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. if (ChainI == MaxParallelChains) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], ChainI); Root = Chain; ChainI = 0; } - SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, + SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)); - SDValue St = DAG.getStore(Root, getCurDebugLoc(), + SDValue St = DAG.getStore(Root, getCurSDLoc(), SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), isVolatile, isNonTemporal, Alignment, TBAAInfo); Chains[ChainI] = St; } - SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], ChainI); - ++SDNodeOrder; - AssignOrderingToNode(StoreNode.getNode()); DAG.setRoot(StoreNode); } static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, SynchronizationScope Scope, - bool Before, DebugLoc dl, + bool Before, SDLoc dl, SelectionDAG &DAG, const TargetLowering &TLI) { // Fence, if necessary @@ -3416,39 +3535,40 @@ static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, } void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); - if (TLI.getInsertFencesForAtomic()) + const TargetLowering *TLI = TM.getTargetLowering(); + if (TLI->getInsertFencesForAtomic()) InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, TLI); + DAG, *TLI); SDValue L = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, - getValue(I.getCompareOperand()).getValueType().getSimpleVT(), + getValue(I.getCompareOperand()).getSimpleValueType(), InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, - TLI.getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, TLI); + DAG, *TLI); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); ISD::NodeType NT; switch (I.getOperation()) { default: llvm_unreachable("Unknown atomicrmw operation"); @@ -3469,47 +3589,50 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { SDValue InChain = getRoot(); - if (TLI.getInsertFencesForAtomic()) + const TargetLowering *TLI = TM.getTargetLowering(); + if (TLI->getInsertFencesForAtomic()) InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, TLI); + DAG, *TLI); SDValue L = DAG.getAtomic(NT, dl, - getValue(I.getValOperand()).getValueType().getSimpleVT(), + getValue(I.getValOperand()).getSimpleValueType(), InChain, getValue(I.getPointerOperand()), getValue(I.getValOperand()), I.getPointerOperand(), 0 /* Alignment */, - TLI.getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, TLI); + DAG, *TLI); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitFence(const FenceInst &I) { - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); + const TargetLowering *TLI = TM.getTargetLowering(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); - Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); + Ops[1] = DAG.getConstant(I.getOrdering(), TLI->getPointerTy()); + Ops[2] = DAG.getConstant(I.getSynchScope(), TLI->getPointerTy()); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3)); } void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); - EVT VT = TLI.getValueType(I.getType()); + const TargetLowering *TLI = TM.getTargetLowering(); + EVT VT = TLI->getValueType(I.getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); @@ -3518,35 +3641,36 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, getValue(I.getPointerOperand()), I.getPointerOperand(), I.getAlignment(), - TLI.getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, TLI); + DAG, *TLI); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); - EVT VT = TLI.getValueType(I.getValueOperand()->getType()); + const TargetLowering *TLI = TM.getTargetLowering(); + EVT VT = TLI->getValueType(I.getValueOperand()->getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, TLI); + DAG, *TLI); SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, @@ -3554,12 +3678,12 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { getValue(I.getPointerOperand()), getValue(I.getValueOperand()), I.getPointerOperand(), I.getAlignment(), - TLI.getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, Scope); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, TLI); + DAG, *TLI); DAG.setRoot(OutChain); } @@ -3584,12 +3708,13 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Info is set by getTgtMemInstrinsic TargetLowering::IntrinsicInfo Info; - bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); + const TargetLowering *TLI = TM.getTargetLowering(); + bool IsTgtIntrinsic = TLI->getTgtMemIntrinsic(Info, I, Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); + Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI->getPointerTy())); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { @@ -3598,7 +3723,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I.getType(), ValueVTs); + ComputeValueVTs(*TLI, I.getType(), ValueVTs); if (HasChain) ValueVTs.push_back(MVT::Other); @@ -3609,20 +3734,20 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, SDValue Result; if (IsTgtIntrinsic) { // This is target intrinsic that touches memory - Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(), + Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, &Ops[0], Ops.size(), Info.memVT, MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, Info.vol, Info.readMem, Info.writeMem); } else if (!HasChain) { - Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(), + Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, &Ops[0], Ops.size()); } else if (!I.getType()->isVoidTy()) { - Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(), + Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, &Ops[0], Ops.size()); } else { - Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(), + Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, &Ops[0], Ops.size()); } @@ -3636,17 +3761,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (!I.getType()->isVoidTy()) { if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { - EVT VT = TLI.getValueType(PTy); - Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result); + EVT VT = TLI->getValueType(PTy); + Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); } setValue(&I, Result); - } else { - // Assign order to result here. If the intrinsic does not produce a result, - // it won't be mapped to a SDNode and visit() will not assign it an order - // number. - ++SDNodeOrder; - AssignOrderingToNode(Result.getNode()); } } @@ -3657,7 +3776,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, /// /// where Op is the hexadecimal representation of floating point value. static SDValue -GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { +GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x007fffff, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, @@ -3672,7 +3791,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { /// where Op is the hexadecimal representation of floating point value. static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, - DebugLoc dl) { + SDLoc dl) { SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x7f800000, MVT::i32)); SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, @@ -3691,7 +3810,7 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) { /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandExp(DebugLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -3794,7 +3913,7 @@ static SDValue expandExp(DebugLoc dl, SDValue Op, SelectionDAG &DAG, /// expandLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog(DebugLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -3890,7 +4009,7 @@ static SDValue expandLog(DebugLoc dl, SDValue Op, SelectionDAG &DAG, /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -3985,7 +4104,7 @@ static SDValue expandLog2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog10(DebugLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -4073,7 +4192,7 @@ static SDValue expandLog10(DebugLoc dl, SDValue Op, SelectionDAG &DAG, /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandExp2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -4168,10 +4287,10 @@ static SDValue expandExp2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. -static SDValue expandPow(DebugLoc dl, SDValue LHS, SDValue RHS, +static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const TargetLowering &TLI) { bool IsExp10 = false; - if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 && + if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) { APFloat Ten(10.0f); @@ -4276,7 +4395,7 @@ static SDValue expandPow(DebugLoc dl, SDValue LHS, SDValue RHS, /// ExpandPowI - Expand a llvm.powi intrinsic. -static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, +static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, SelectionDAG &DAG) { // If RHS is a constant, we can expand this out to a multiplication tree, // otherwise we end up lowering to a call to __powidf2 (for example). When @@ -4335,7 +4454,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) { return 0; const SDValue &Ext = N.getOperand(0); - if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){ + if (Ext.getOpcode() == ISD::AssertZext || + Ext.getOpcode() == ISD::AssertSext) { const SDValue &CFR = Ext.getOperand(0); if (CFR.getOpcode() == ISD::CopyFromReg) return cast<RegisterSDNode>(CFR.getOperand(1))->getReg(); @@ -4358,20 +4478,19 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); - const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); // Ignore inlined function arguments here. DIVariable DV(Variable); if (DV.isInlinedFnArgument(MF.getFunction())) return false; - unsigned Reg = 0; + Optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. - Offset = FuncInfo.getArgumentFrameIndex(Arg); - if (Offset) - Reg = TRI->getFrameRegister(MF); + if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) + Op = MachineOperand::CreateFI(FI); - if (!Reg && N.getNode()) { + if (!Op && N.getNode()) { + unsigned Reg; if (N.getOpcode() == ISD::CopyFromReg) Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); else @@ -4382,32 +4501,39 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, if (PR) Reg = PR; } + if (Reg) + Op = MachineOperand::CreateReg(Reg, false); } - if (!Reg) { + if (!Op) { // Check if ValueMap has reg number. DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) - Reg = VMI->second; + Op = MachineOperand::CreateReg(VMI->second, false); } - if (!Reg && N.getNode()) { + if (!Op && N.getNode()) // Check if frame index is available. if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) if (FrameIndexSDNode *FINode = - dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) { - Reg = TRI->getFrameRegister(MF); - Offset = FINode->getIndex(); - } - } + dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) + Op = MachineOperand::CreateFI(FINode->getIndex()); - if (!Reg) + if (!Op) return false; - MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(), - TII->get(TargetOpcode::DBG_VALUE)) - .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable); - FuncInfo.ArgDbgValues.push_back(&*MIB); + // FIXME: This does not handle register-indirect values at offset 0. + bool IsIndirect = Offset != 0; + if (Op->isReg()) + FuncInfo.ArgDbgValues.push_back(BuildMI(MF, getCurDebugLoc(), + TII->get(TargetOpcode::DBG_VALUE), + IsIndirect, + Op->getReg(), Offset, Variable)); + else + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) + .addOperand(*Op).addImm(Offset).addMetadata(Variable)); + return true; } @@ -4424,6 +4550,8 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, /// otherwise lower it and return null. const char * SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { + const TargetLowering *TLI = TM.getTargetLowering(); + SDLoc sdl = getCurSDLoc(); DebugLoc dl = getCurDebugLoc(); SDValue Res; @@ -4436,17 +4564,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::vaend: visitVAEnd(I); return 0; case Intrinsic::vacopy: visitVACopy(I); return 0; case Intrinsic::returnaddress: - setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(), + setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); return 0; case Intrinsic::frameaddress: - setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), + setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); return 0; case Intrinsic::setjmp: - return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; + return &"_setjmp"[!TLI->usesUnderscoreSetJmp()]; case Intrinsic::longjmp: - return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; + return &"_longjmp"[!TLI->usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { // Assert for address < 256 since we support only user defined address // spaces. @@ -4462,7 +4590,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, + DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)))); return 0; @@ -4480,7 +4608,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, + DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)))); return 0; } @@ -4499,7 +4627,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, + DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)))); return 0; @@ -4508,17 +4636,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); MDNode *Variable = DI.getVariable(); const Value *Address = DI.getAddress(); - if (!Address || !DIVariable(Variable).Verify()) { + DIVariable DIVar(Variable); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!Address || !DIVar) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return 0; } - // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder - // but do not always have a corresponding SDNode built. The SDNodeOrder - // absolute, but not relative, values are different depending on whether - // debug info exists. - ++SDNodeOrder; - // Check if address has undef value. if (isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { @@ -4589,7 +4714,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast<DbgValueInst>(I); - if (!DIVariable(DI.getVariable()).Verify()) + DIVariable DIVar(DI.getVariable()); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgValueInst should be either null or a DIVariable."); + if (!DIVar) return 0; MDNode *Variable = DI.getVariable(); @@ -4598,11 +4726,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!V) return 0; - // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder - // but do not always have a corresponding SDNode built. The SDNodeOrder - // absolute, but not relative, values are different depending on whether - // debug info exists. - ++SDNodeOrder; SDDbgValue *SDV; if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); @@ -4666,7 +4789,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: DAG.getMachineFunction().getMMI().setCallsEHReturn(true); - DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl, + DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl, MVT::Other, getControlRoot(), getValue(I.getArgOperand(0)), @@ -4676,17 +4799,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); return 0; case Intrinsic::eh_dwarf_cfa: { - SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl, - TLI.getPointerTy()); - SDValue Offset = DAG.getNode(ISD::ADD, dl, - TLI.getPointerTy(), - DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl, - TLI.getPointerTy()), + SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, + TLI->getPointerTy()); + SDValue Offset = DAG.getNode(ISD::ADD, sdl, + CfaArg.getValueType(), + DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, + CfaArg.getValueType()), CfaArg); - SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl, - TLI.getPointerTy(), - DAG.getConstant(0, TLI.getPointerTy())); - setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), + SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, + TLI->getPointerTy(), + DAG.getConstant(0, TLI->getPointerTy())); + setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), FA, Offset)); return 0; } @@ -4712,7 +4835,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[2]; Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); - SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, + SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl, DAG.getVTList(MVT::i32, MVT::Other), Ops, 2); setValue(&I, Op.getValue(0)); @@ -4720,7 +4843,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::eh_sjlj_longjmp: { - DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other, + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, getRoot(), getValue(I.getArgOperand(0)))); return 0; } @@ -4775,10 +4898,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue ShOps[2]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); - EVT DestVT = TLI.getValueType(I.getType()); - ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt); - Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, &ShOps[0], 2); + EVT DestVT = TLI->getValueType(I.getType()); + ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); + Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, DAG.getConstant(NewIntrinsic, MVT::i32), getValue(I.getArgOperand(0)), ShAmt); setValue(&I, Res); @@ -4788,14 +4911,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vinsertf128_ps_256: case Intrinsic::x86_avx_vinsertf128_si_256: case Intrinsic::x86_avx2_vinserti128: { - EVT DestVT = TLI.getValueType(I.getType()); - EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); + EVT DestVT = TLI->getValueType(I.getType()); + EVT ElVT = TLI->getValueType(I.getArgOperand(1)->getType()); uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * ElVT.getVectorNumElements(); - Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT, + Res = DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI->getVectorIdxTy())); setValue(&I, Res); return 0; } @@ -4803,12 +4926,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vextractf128_ps_256: case Intrinsic::x86_avx_vextractf128_si_256: case Intrinsic::x86_avx2_vextracti128: { - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI->getValueType(I.getType()); uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * DestVT.getVectorNumElements(); - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT, getValue(I.getArgOperand(0)), - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI->getVectorIdxTy())); setValue(&I, Res); return 0; } @@ -4834,9 +4957,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertus: Code = ISD::CVT_US; break; case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI->getValueType(I.getType()); const Value *Op1 = I.getArgOperand(0); - Res = DAG.getConvertRndSat(DestVT, dl, getValue(Op1), + Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), DAG.getValueType(DestVT), DAG.getValueType(getValue(Op1).getValueType()), getValue(I.getArgOperand(1)), @@ -4846,27 +4969,27 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::powi: - setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)), + setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); return 0; case Intrinsic::log: - setValue(&I, expandLog(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::log2: - setValue(&I, expandLog2(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::log10: - setValue(&I, expandLog10(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::exp: - setValue(&I, expandExp(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::exp2: - setValue(&I, expandExp2(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::pow: - setValue(&I, expandPow(dl, getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), DAG, TLI)); + setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG, *TLI)); return 0; case Intrinsic::sqrt: case Intrinsic::fabs: @@ -4876,7 +4999,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::ceil: case Intrinsic::trunc: case Intrinsic::rint: - case Intrinsic::nearbyint: { + case Intrinsic::nearbyint: + case Intrinsic::round: { unsigned Opcode; switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. @@ -4889,35 +5013,42 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + case Intrinsic::round: Opcode = ISD::FROUND; break; } - setValue(&I, DAG.getNode(Opcode, dl, + setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return 0; } + case Intrinsic::copysign: + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return 0; case Intrinsic::fma: - setValue(&I, DAG.getNode(ISD::FMA, dl, + setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); return 0; case Intrinsic::fmuladd: { - EVT VT = TLI.getValueType(I.getType()); + EVT VT = TLI->getValueType(I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI.isFMAFasterThanMulAndAdd(VT)){ - setValue(&I, DAG.getNode(ISD::FMA, dl, + TLI->isFMAFasterThanFMulAndFAdd(VT)) { + setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); } else { - SDValue Mul = DAG.getNode(ISD::FMUL, dl, + SDValue Mul = DAG.getNode(ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1))); - SDValue Add = DAG.getNode(ISD::FADD, dl, + SDValue Add = DAG.getNode(ISD::FADD, sdl, getValue(I.getArgOperand(0)).getValueType(), Mul, getValue(I.getArgOperand(2))); @@ -4926,21 +5057,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::convert_to_fp16: - setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, + setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, sdl, MVT::i16, getValue(I.getArgOperand(0)))); return 0; case Intrinsic::convert_from_fp16: - setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl, + setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, sdl, MVT::f32, getValue(I.getArgOperand(0)))); return 0; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); - DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp)); + DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); return 0; } case Intrinsic::readcyclecounter: { SDValue Op = getRoot(); - Res = DAG.getNode(ISD::READCYCLECOUNTER, dl, + Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl, DAG.getVTList(MVT::i64, MVT::Other), &Op, 1); setValue(&I, Res); @@ -4948,7 +5079,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::bswap: - setValue(&I, DAG.getNode(ISD::BSWAP, dl, + setValue(&I, DAG.getNode(ISD::BSWAP, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return 0; @@ -4957,7 +5088,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, - dl, Ty, Arg)); + sdl, Ty, Arg)); return 0; } case Intrinsic::ctlz: { @@ -4965,33 +5096,33 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, - dl, Ty, Arg)); + sdl, Ty, Arg)); return 0; } case Intrinsic::ctpop: { SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); - setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg)); + setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); return 0; } case Intrinsic::stacksave: { SDValue Op = getRoot(); - Res = DAG.getNode(ISD::STACKSAVE, dl, - DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1); + Res = DAG.getNode(ISD::STACKSAVE, sdl, + DAG.getVTList(TLI->getPointerTy(), MVT::Other), &Op, 1); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return 0; } case Intrinsic::stackrestore: { Res = getValue(I.getArgOperand(0)); - DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res)); + DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); return 0; } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - EVT PtrTy = TLI.getPointerTy(); + EVT PtrTy = TLI->getPointerTy(); SDValue Src = getValue(I.getArgOperand(0)); // The guard's value. AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); @@ -5002,7 +5133,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. - Res = DAG.getStore(getRoot(), dl, Src, FIN, + Res = DAG.getStore(getRoot(), sdl, Src, FIN, MachinePointerInfo::getFixedStack(FI), true, false, 0); setValue(&I, Res); @@ -5046,14 +5177,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); Ops[5] = DAG.getSrcValue(F); - Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6); + Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops, 6); DAG.setRoot(Res); return 0; } case Intrinsic::adjust_trampoline: { - setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl, - TLI.getPointerTy(), + setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, + TLI->getPointerTy(), getValue(I.getArgOperand(0)))); return 0; } @@ -5070,7 +5201,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); case Intrinsic::flt_rounds: - setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); + setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); return 0; case Intrinsic::expect: { @@ -5083,9 +5214,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::trap: { StringRef TrapFuncName = TM.Options.getTrapFunctionName(); if (TrapFuncName.empty()) { - ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? + ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? ISD::TRAP : ISD::DEBUGTRAP; - DAG.setRoot(DAG.getNode(Op, dl,MVT::Other, getRoot())); + DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); return 0; } TargetLowering::ArgListTy Args; @@ -5094,9 +5225,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), - Args, DAG, dl); - std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); + DAG.getExternalSymbol(TrapFuncName.data(), + TLI->getPointerTy()), + Args, DAG, sdl); + std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI); DAG.setRoot(Result.second); return 0; } @@ -5121,7 +5253,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op2 = getValue(I.getArgOperand(1)); SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); - setValue(&I, DAG.getNode(Op, dl, VTs, Op1, Op2)); + setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); return 0; } case Intrinsic::prefetch: { @@ -5132,7 +5264,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); Ops[4] = getValue(I.getArgOperand(3)); - DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl, + DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), &Ops[0], 5, EVT::getIntegerVT(*Context, 8), @@ -5153,8 +5285,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SmallVector<Value *, 4> Allocas; GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD); - for (SmallVector<Value*, 4>::iterator Object = Allocas.begin(), - E = Allocas.end(); Object != E; ++Object) { + for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), + E = Allocas.end(); Object != E; ++Object) { AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); // Could not find an Alloca. @@ -5165,24 +5297,45 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[2]; Ops[0] = getRoot(); - Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); + Ops[1] = DAG.getFrameIndex(FI, TLI->getPointerTy(), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); - Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2); + Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops, 2); DAG.setRoot(Res); } return 0; } case Intrinsic::invariant_start: // Discard region information. - setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); + setValue(&I, DAG.getUNDEF(TLI->getPointerTy())); return 0; case Intrinsic::invariant_end: // Discard region information. return 0; + case Intrinsic::stackprotectorcheck: { + // Do not actually emit anything for this basic block. Instead we initialize + // the stack protector descriptor and export the guard variable so we can + // access it in FinishBasicBlock. + const BasicBlock *BB = I.getParent(); + SPDescriptor.initialize(BB, FuncInfo.MBBMap[BB], I); + ExportFromCurrentBlock(SPDescriptor.getGuard()); + + // Flush our exports since we are going to process a terminator. + (void)getControlRoot(); + return 0; + } case Intrinsic::donothing: // ignore return 0; + case Intrinsic::experimental_stackmap: { + visitStackmap(I); + return 0; + } + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: { + visitPatchpoint(I); + return 0; + } } } @@ -5201,26 +5354,27 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(RetTy, CS.getAttributes(), Outs, TLI); + const TargetLowering *TLI = TM.getTargetLowering(); + GetReturnInfo(RetTy, CS.getAttributes(), Outs, *TLI); - bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - DAG.getMachineFunction(), - FTy->isVarArg(), Outs, - FTy->getContext()); + bool CanLowerReturn = TLI->CanLowerReturn(CS.getCallingConv(), + DAG.getMachineFunction(), + FTy->isVarArg(), Outs, + FTy->getContext()); SDValue DemoteStackSlot; int DemoteStackIdx = -100; if (!CanLowerReturn) { - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize( + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize( FTy->getReturnType()); - unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment( + unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment( FTy->getReturnType()); MachineFunction &MF = DAG.getMachineFunction(); DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); - DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy()); + DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI->getPointerTy()); Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; Entry.isSExt = false; @@ -5246,15 +5400,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue ArgNode = getValue(V); Entry.Node = ArgNode; Entry.Ty = V->getType(); - unsigned attrInd = i - CS.arg_begin() + 1; - Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); - Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); - Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); - Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); - Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); - Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); - Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned); - Entry.Alignment = CS.getParamAlignment(attrInd); + // Skip the first return-type Attribute to get to params. + Entry.setAttributes(&CS, i - CS.arg_begin() + 1); Args.push_back(Entry); } @@ -5277,18 +5424,18 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Both PendingLoads and PendingExports must be flushed here; // this call might not return. (void)getRoot(); - DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel)); + DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel)); } // Check if target-independent constraints permit a tail call here. - // Target-dependent constraints are checked within TLI.LowerCallTo. - if (isTailCall && !isInTailCallPosition(CS, TLI)) + // Target-dependent constraints are checked within TLI->LowerCallTo. + if (isTailCall && !isInTailCallPosition(CS, *TLI)) isTailCall = false; TargetLowering:: CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG, - getCurDebugLoc(), CS); - std::pair<SDValue,SDValue> Result = TLI.LowerCallTo(CLI); + getCurSDLoc(), CS); + std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI); assert((isTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && @@ -5301,59 +5448,57 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SmallVector<EVT, 1> PVTs; Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); - ComputeValueVTs(TLI, PtrRetTy, PVTs); + ComputeValueVTs(*TLI, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); EVT PtrVT = PVTs[0]; SmallVector<EVT, 4> RetTys; SmallVector<uint64_t, 4> Offsets; RetTy = FTy->getReturnType(); - ComputeValueVTs(TLI, RetTy, RetTys, &Offsets); + ComputeValueVTs(*TLI, RetTy, RetTys, &Offsets); unsigned NumValues = RetTys.size(); SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(NumValues); for (unsigned i = 0; i < NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, + SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, DemoteStackSlot, DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(RetTys[i], getCurDebugLoc(), Result.second, Add, + SDValue L = DAG.getLoad(RetTys[i], getCurSDLoc(), Result.second, Add, MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, false, false, 1); Values[i] = L; Chains[i] = L.getValue(1); } - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], NumValues); PendingLoads.push_back(Chain); setValue(CS.getInstruction(), - DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&RetTys[0], RetTys.size()), &Values[0], Values.size())); } - // Assign order to nodes here. If the call does not produce a result, it won't - // be mapped to a SDNode and visit() will not assign it an order number. if (!Result.second.getNode()) { - // As a special case, a null chain means that a tail call has been emitted and - // the DAG root is already updated. + // As a special case, a null chain means that a tail call has been emitted + // and the DAG root is already updated. HasTailCall = true; - ++SDNodeOrder; - AssignOrderingToNode(DAG.getRoot().getNode()); + + // Since there's no actual continuation from this block, nothing can be + // relying on us setting vregs for them. + PendingExports.clear(); } else { DAG.setRoot(Result.second); - ++SDNodeOrder; - AssignOrderingToNode(Result.second.getNode()); } if (LandingPad) { // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); - DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel)); + DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. MMI.addInvoke(LandingPad, BeginLabel, EndLabel); @@ -5408,10 +5553,10 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, } SDValue Ptr = Builder.getValue(PtrVal); - SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, + SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr, MachinePointerInfo(PtrVal), false /*volatile*/, - false /*nontemporal*/, + false /*nontemporal*/, false /*isinvariant*/, 1 /* align=1 */); if (!ConstantMemory) @@ -5419,6 +5564,18 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, return LoadVal; } +/// processIntegerCallValue - Record the value for an instruction that +/// produces an integer result, converting the type where necessary. +void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, + SDValue Value, + bool IsSigned) { + EVT VT = TM.getTargetLowering()->getValueType(I.getType(), true); + if (IsSigned) + Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); + else + Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT); + setValue(&I, Value); +} /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. /// If so, return true and lower it, otherwise return false and it will be @@ -5434,15 +5591,33 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { !I.getType()->isIntegerTy()) return false; - const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2)); + const Value *Size = I.getArgOperand(2); + const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); + if (CSize && CSize->getZExtValue() == 0) { + EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true); + setValue(&I, DAG.getConstant(0, CallVT)); + return true; + } + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(LHS), getValue(RHS), getValue(Size), + MachinePointerInfo(LHS), + MachinePointerInfo(RHS)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, true); + PendingLoads.push_back(Res.second); + return true; + } // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 - if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) { + if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) { bool ActuallyDoIt = true; MVT LoadVT; Type *LoadTy; - switch (Size->getZExtValue()) { + switch (CSize->getZExtValue()) { default: LoadVT = MVT::Other; LoadTy = 0; @@ -5450,20 +5625,20 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { break; case 2: LoadVT = MVT::i16; - LoadTy = Type::getInt16Ty(Size->getContext()); + LoadTy = Type::getInt16Ty(CSize->getContext()); break; case 4: LoadVT = MVT::i32; - LoadTy = Type::getInt32Ty(Size->getContext()); + LoadTy = Type::getInt32Ty(CSize->getContext()); break; case 8: LoadVT = MVT::i64; - LoadTy = Type::getInt64Ty(Size->getContext()); + LoadTy = Type::getInt64Ty(CSize->getContext()); break; /* case 16: LoadVT = MVT::v4i32; - LoadTy = Type::getInt32Ty(Size->getContext()); + LoadTy = Type::getInt32Ty(CSize->getContext()); LoadTy = VectorType::get(LoadTy, 4); break; */ @@ -5476,10 +5651,11 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // Require that we can find a legal MVT, and only do this if the target // supports unaligned loads of that type. Expanding into byte loads would // bloat the code. - if (ActuallyDoIt && Size->getZExtValue() > 4) { + const TargetLowering *TLI = TM.getTargetLowering(); + if (ActuallyDoIt && CSize->getZExtValue() > 4) { // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. - if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT)) + if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT)) ActuallyDoIt = false; } @@ -5487,10 +5663,9 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); - SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal, + SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, ISD::SETNE); - EVT CallVT = TLI.getValueType(I.getType(), true); - setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT)); + processIntegerCallValue(I, Res, false); return true; } } @@ -5499,6 +5674,148 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { return false; } +/// visitMemChrCall -- See if we can lower a memchr call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { + // Verify that the prototype makes sense. void *memchr(void *, int, size_t) + if (I.getNumArgOperands() != 3) + return false; + + const Value *Src = I.getArgOperand(0); + const Value *Char = I.getArgOperand(1); + const Value *Length = I.getArgOperand(2); + if (!Src->getType()->isPointerTy() || + !Char->getType()->isIntegerTy() || + !Length->getType()->isIntegerTy() || + !I.getType()->isPointerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Src), getValue(Char), getValue(Length), + MachinePointerInfo(Src)); + if (Res.first.getNode()) { + setValue(&I, Res.first); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an +/// optimized form. If so, return true and lower it, otherwise return false +/// and it will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { + // Verify that the prototype makes sense. char *strcpy(char *, char *) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isPointerTy() || + !I.getType()->isPointerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0), + MachinePointerInfo(Arg1), isStpcpy); + if (Res.first.getNode()) { + setValue(&I, Res.first); + DAG.setRoot(Res.second); + return true; + } + + return false; +} + +/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form. +/// If so, return true and lower it, otherwise return false and it will be +/// lowered like a normal call. +bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { + // Verify that the prototype makes sense. int strcmp(void*,void*) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isPointerTy() || + !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0), + MachinePointerInfo(Arg1)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, true); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrLenCall -- See if we can lower a strlen call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { + // Verify that the prototype makes sense. size_t strlen(char *) + if (I.getNumArgOperands() != 1) + return false; + + const Value *Arg0 = I.getArgOperand(0); + if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), MachinePointerInfo(Arg0)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, false); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { + // Verify that the prototype makes sense. size_t strnlen(char *, size_t) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isIntegerTy() || + !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, false); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + /// visitUnaryFloatCall - If a call instruction is a unary floating-point /// operation (as expected), translate it to an SDNode with the specified opcode /// and return true. @@ -5512,7 +5829,7 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, return false; SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp)); + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp)); return true; } @@ -5561,7 +5878,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { I.onlyReadsMemory()) { SDValue LHS = getValue(I.getArgOperand(0)); SDValue RHS = getValue(I.getArgOperand(1)); - setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(), LHS.getValueType(), LHS, RHS)); return; } @@ -5587,6 +5904,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { case LibFunc::sqrt: case LibFunc::sqrtf: case LibFunc::sqrtl: + case LibFunc::sqrt_finite: + case LibFunc::sqrtf_finite: + case LibFunc::sqrtl_finite: if (visitUnaryFloatCall(I, ISD::FSQRT)) return; break; @@ -5614,6 +5934,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitUnaryFloatCall(I, ISD::FRINT)) return; break; + case LibFunc::round: + case LibFunc::roundf: + case LibFunc::roundl: + if (visitUnaryFloatCall(I, ISD::FROUND)) + return; + break; case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: @@ -5636,6 +5962,30 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitMemCmpCall(I)) return; break; + case LibFunc::memchr: + if (visitMemChrCall(I)) + return; + break; + case LibFunc::strcpy: + if (visitStrCpyCall(I, false)) + return; + break; + case LibFunc::stpcpy: + if (visitStrCpyCall(I, true)) + return; + break; + case LibFunc::strcmp: + if (visitStrCmpCall(I)) + return; + break; + case LibFunc::strlen: + if (visitStrLenCall(I)) + return; + break; + case LibFunc::strnlen: + if (visitStrNLenCall(I)) + return; + break; } } } @@ -5644,7 +5994,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (!RenameFn) Callee = getValue(I.getCalledValue()); else - Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); + Callee = DAG.getExternalSymbol(RenameFn, + TM.getTargetLowering()->getPointerTy()); // Check if we can potentially perform a tail call. More detailed checking is // be done within LowerCallTo, after more information about the call is known. @@ -5733,7 +6084,7 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; /// static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, - DebugLoc DL, + SDLoc DL, SDISelAsmOperandInfo &OpInfo) { LLVMContext &Context = *DAG.getContext(); @@ -5839,8 +6190,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { /// ConstraintOperands - Information about all of the constraints. SDISelAsmOperandInfoVector ConstraintOperands; + const TargetLowering *TLI = TM.getTargetLowering(); TargetLowering::AsmOperandInfoVector - TargetConstraints = TLI.ParseConstraints(CS); + TargetConstraints = TLI->ParseConstraints(CS); bool hasMemory = false; @@ -5865,10 +6217,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast<StructType>(CS.getType())) { - OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo)); + OpVT = TLI->getSimpleValueType(STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpVT = TLI.getSimpleValueType(CS.getType()); + OpVT = TLI->getSimpleValueType(CS.getType()); } ++ResNo; break; @@ -5889,7 +6241,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD). + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, TD). getSimpleVT(); } @@ -5901,7 +6253,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { else { for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { TargetLowering::ConstraintType - CType = TLI.getConstraintType(OpInfo.Codes[j]); + CType = TLI->getConstraintType(OpInfo.Codes[j]); if (CType == TargetLowering::C_Memory) { hasMemory = true; break; @@ -5933,11 +6285,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintVT != Input.ConstraintVT) { std::pair<unsigned, const TargetRegisterClass*> MatchRC = - TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); + TLI->getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); std::pair<unsigned, const TargetRegisterClass*> InputRC = - TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, - Input.ConstraintVT); + TLI->getRegForInlineAsmConstraint(Input.ConstraintCode, + Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (MatchRC.second != InputRC.second)) { @@ -5950,7 +6302,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); + TLI->ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.Type == InlineAsm::isClobber) @@ -5978,17 +6330,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), - TLI.getPointerTy()); + TLI->getPointerTy()); } else { // Otherwise, create a stack slot and emit a store to it before the // asm. Type *Ty = OpVal->getType(); - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); - unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); + unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); - Chain = DAG.getStore(Chain, getCurDebugLoc(), + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI->getPointerTy()); + Chain = DAG.getStore(Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(SSFI), false, false, 0); @@ -6005,7 +6357,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); + GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); } // Second pass - Loop over all of the operands, assigning virtual or physregs @@ -6016,7 +6368,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); + GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -6024,7 +6376,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands.push_back(SDValue()); // reserve space for input chain AsmNodeOperands.push_back( DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), - TLI.getPointerTy())); + TLI->getPointerTy())); // If we have a !srcloc metadata node associated with it, we want to attach // this to the ultimately generated inline asm machineinstr. To do this, we @@ -6047,7 +6399,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, SDValue()); + TLI->ComputeConstraintToUse(OpInfo, SDValue()); // Ideally, we would only check against memory constraints. However, the // meaning of an other constraint can be target-specific and we can't easily @@ -6065,7 +6417,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, - TLI.getPointerTy())); + TLI->getPointerTy())); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. @@ -6087,7 +6439,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add information to the INLINEASM node to know about this output. unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, - TLI.getPointerTy())); + TLI->getPointerTy())); AsmNodeOperands.push_back(OpInfo.CallOperand); break; } @@ -6098,10 +6450,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // we can use. if (OpInfo.AssignedRegs.Regs.empty()) { LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), + Ctx.emitError(CS.getInstruction(), "couldn't allocate output register for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); - break; + Twine(OpInfo.ConstraintCode) + "'"); + return; } // If this is an indirect operand, store through the pointer after the @@ -6118,13 +6470,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add information to the INLINEASM node to know that this register is // set. - OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ? - InlineAsm::Kind_RegDefEarlyClobber : - InlineAsm::Kind_RegDef, - false, - 0, - DAG, - AsmNodeOperands); + OpInfo.AssignedRegs + .AddInlineAsmOperands(OpInfo.isEarlyClobber + ? InlineAsm::Kind_RegDefEarlyClobber + : InlineAsm::Kind_RegDef, + false, 0, DAG, AsmNodeOperands); break; } case InlineAsm::isInput: { @@ -6156,10 +6506,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.isIndirect) { // This happens on gcc/testsuite/gcc.dg/pr8788-1.c LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" - " don't know how to handle tied " - "indirect register inputs"); - report_fatal_error("Cannot handle indirect register inputs!"); + Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" + " don't know how to handle tied " + "indirect register inputs"); + return; } RegsForValue MatchedRegs; @@ -6169,18 +6519,18 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); i != e; ++i) { - if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) + if (const TargetRegisterClass *RC = TLI->getRegClassFor(RegVT)) MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); else { LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), "inline asm error: This value" + Ctx.emitError(CS.getInstruction(), + "inline asm error: This value" " type register class is not natively supported!"); - report_fatal_error("inline asm error: This value type register " - "class is not natively supported!"); + return; } } // Use the produced MatchedRegs object to - MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), + MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), @@ -6196,7 +6546,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, OpInfo.getMatchedOperand()); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, - TLI.getPointerTy())); + TLI->getPointerTy())); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; } @@ -6208,34 +6558,34 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector<SDValue> Ops; - TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, - Ops, DAG); + TLI->LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, + Ops, DAG); if (Ops.empty()) { LLVMContext &Ctx = *DAG.getContext(); Ctx.emitError(CS.getInstruction(), "invalid operand for inline asm constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); - break; + Twine(OpInfo.ConstraintCode) + "'"); + return; } // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI.getPointerTy())); + TLI->getPointerTy())); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; } if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); - assert(InOperandVal.getValueType() == TLI.getPointerTy() && + assert(InOperandVal.getValueType() == TLI->getPointerTy() && "Memory operands expect pointer values"); // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI.getPointerTy())); + TLI->getPointerTy())); AsmNodeOperands.push_back(InOperandVal); break; } @@ -6249,20 +6599,21 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { LLVMContext &Ctx = *DAG.getContext(); Ctx.emitError(CS.getInstruction(), "Don't know how to handle indirect register inputs yet " - "for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); - break; + "for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; } // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), + Ctx.emitError(CS.getInstruction(), "couldn't allocate input reg for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); - break; + Twine(OpInfo.ConstraintCode) + "'"); + return; } - OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), + OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, @@ -6285,7 +6636,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; if (Flag.getNode()) AsmNodeOperands.push_back(Flag); - Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), + Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(), DAG.getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], AsmNodeOperands.size()); Flag = Chain.getValue(1); @@ -6293,12 +6644,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this asm returns a register value, copy the result from that register // and set it as the value of the call. if (!RetValRegs.Regs.empty()) { - SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), + SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { - EVT ResultType = TLI.getValueType(CS.getType()); + EVT ResultType = TLI->getValueType(CS.getType()); // If any of the results of the inline asm is a vector, it may have the // wrong width/num elts. This can happen for register classes that can @@ -6306,7 +6657,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // not have the same VT as was expected. Convert it to the right type // with bit_convert. if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { - Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), + Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultType, Val); } else if (ResultType != Val.getValueType() && @@ -6314,7 +6665,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If a result value was tied to an input value, the computed result may // have a wider width than the expected result. Extract the relevant // portion. - Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val); + Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultType, Val); } assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); @@ -6333,7 +6684,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; const Value *Ptr = IndirectStoresToEmit[i].second; - SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), + SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, IA); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); } @@ -6341,7 +6692,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Emit the non-flagged stores from the physregs. SmallVector<SDValue, 8> OutChains; for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) { - SDValue Val = DAG.getStore(Chain, getCurDebugLoc(), + SDValue Val = DAG.getStore(Chain, getCurSDLoc(), StoresToEmit[i].first, getValue(StoresToEmit[i].second), MachinePointerInfo(StoresToEmit[i].second), @@ -6350,22 +6701,23 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } if (!OutChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &OutChains[0], OutChains.size()); DAG.setRoot(Chain); } void SelectionDAGBuilder::visitVAStart(const CallInst &I) { - DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { - const DataLayout &TD = *TLI.getDataLayout(); - SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), + const TargetLowering *TLI = TM.getTargetLowering(); + const DataLayout &TD = *TLI->getDataLayout(); + SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), TD.getABITypeAlignment(I.getType())); @@ -6374,14 +6726,14 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { } void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { - DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVACopy(const CallInst &I) { - DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), @@ -6389,6 +6741,248 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { DAG.getSrcValue(I.getArgOperand(1)))); } +/// \brief Lower an argument list according to the target calling convention. +/// +/// \return A tuple of <return-value, token-chain> +/// +/// This is a helper for lowering intrinsics that follow a target calling +/// convention or require stack pointer adjustment. Only a subset of the +/// intrinsic's operands need to participate in the calling convention. +std::pair<SDValue, SDValue> +SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, + unsigned NumArgs, SDValue Callee, + bool useVoidTy) { + TargetLowering::ArgListTy Args; + Args.reserve(NumArgs); + + // Populate the argument list. + // Attributes for args start at offset 1, after the return attribute. + ImmutableCallSite CS(&CI); + for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; + ArgI != ArgE; ++ArgI) { + const Value *V = CI.getOperand(ArgI); + + assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); + + TargetLowering::ArgListEntry Entry; + Entry.Node = getValue(V); + Entry.Ty = V->getType(); + Entry.setAttributes(&CS, AttrI); + Args.push_back(Entry); + } + + Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType(); + TargetLowering::CallLoweringInfo CLI(getRoot(), retTy, /*retSExt*/ false, + /*retZExt*/ false, /*isVarArg*/ false, /*isInReg*/ false, NumArgs, + CI.getCallingConv(), /*isTailCall*/ false, /*doesNotReturn*/ false, + /*isReturnValueUsed*/ CI.use_empty(), Callee, Args, DAG, getCurSDLoc()); + + const TargetLowering *TLI = TM.getTargetLowering(); + return TLI->LowerCallTo(CLI); +} + +/// \brief Lower llvm.experimental.stackmap directly to its target opcode. +void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { + // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, + // [live variables...]) + + assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); + + SDValue Callee = getValue(CI.getCalledValue()); + + // Lower into a call sequence with no args and no return value. + std::pair<SDValue, SDValue> Result = LowerCallOperands(CI, 0, 0, Callee); + // Set the root to the target-lowered call chain. + SDValue Chain = Result.second; + DAG.setRoot(Chain); + + /// Get a call instruction from the call sequence chain. + /// Tail calls are not allowed. + SDNode *CallEnd = Chain.getNode(); + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && + "Expected a callseq node."); + SDNode *Call = CallEnd->getOperand(0).getNode(); + bool hasGlue = Call->getGluedNode(); + + // Replace the target specific call node with the stackmap intrinsic. + SmallVector<SDValue, 8> Ops; + + // Add the <id> and <numShadowBytes> constants. + for (unsigned i = 0; i < 2; ++i) { + SDValue tmp = getValue(CI.getOperand(i)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); + } + // Push live variables for the stack map. + for (unsigned i = 2, e = CI.getNumArgOperands(); i != e; ++i) + Ops.push_back(getValue(CI.getArgOperand(i))); + + // Push the chain (this is originally the first operand of the call, but + // becomes now the last or second to last operand). + Ops.push_back(*(Call->op_begin())); + + // Push the glue flag (last operand). + if (hasGlue) + Ops.push_back(*(Call->op_end()-1)); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + // Replace the target specific call node with a STACKMAP node. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::STACKMAP, getCurSDLoc(), + NodeTys, Ops); + + // StackMap generates no value, so nothing goes in the NodeMap. + + // Fixup the consumers of the intrinsic. The chain and glue may be used in the + // call sequence. + DAG.ReplaceAllUsesWith(Call, MN); + + DAG.DeleteNode(Call); +} + +/// \brief Lower llvm.experimental.patchpoint directly to its target opcode. +void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { + // void|i64 @llvm.experimental.patchpoint.void|i64(i32 <id>, + // i32 <numBytes>, + // i8* <target>, + // i32 <numArgs>, + // [Args...], + // [live variables...]) + + CallingConv::ID CC = CI.getCallingConv(); + bool isAnyRegCC = CC == CallingConv::AnyReg; + bool hasDef = !CI.getType()->isVoidTy(); + SDValue Callee = getValue(CI.getOperand(2)); // <target> + + // Get the real number of arguments participating in the call <numArgs> + unsigned NumArgs = + cast<ConstantSDNode>(getValue(CI.getArgOperand(3)))->getZExtValue(); + + // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> + assert(CI.getNumArgOperands() >= NumArgs + 4 && + "Not enough arguments provided to the patchpoint intrinsic"); + + // For AnyRegCC the arguments are lowered later on manually. + unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs; + std::pair<SDValue, SDValue> Result = + LowerCallOperands(CI, 4, NumCallArgs, Callee, isAnyRegCC); + + // Set the root to the target-lowered call chain. + SDValue Chain = Result.second; + DAG.setRoot(Chain); + + SDNode *CallEnd = Chain.getNode(); + if (hasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) + CallEnd = CallEnd->getOperand(0).getNode(); + + /// Get a call instruction from the call sequence chain. + /// Tail calls are not allowed. + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && + "Expected a callseq node."); + SDNode *Call = CallEnd->getOperand(0).getNode(); + bool hasGlue = Call->getGluedNode(); + + // Replace the target specific call node with the patchable intrinsic. + SmallVector<SDValue, 8> Ops; + + // Add the <id> and <numNopBytes> constants. + for (unsigned i = 0; i < 2; ++i) { + SDValue tmp = getValue(CI.getOperand(i)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); + } + // Assume that the Callee is a constant address. + Ops.push_back( + DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(), + /*isTarget=*/true)); + + // Adjust <numArgs> to account for any arguments that have been passed on the + // stack instead. + // Call Node: Chain, Target, {Args}, RegMask, [Glue] + unsigned NumCallRegArgs = Call->getNumOperands() - (hasGlue ? 4 : 3); + NumCallRegArgs = isAnyRegCC ? NumArgs : NumCallRegArgs; + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); + + // Add the calling convention + Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32)); + + // Add the arguments we omitted previously. The register allocator should + // place these in any free register. + if (isAnyRegCC) + for (unsigned i = 4, e = NumArgs + 4; i != e; ++i) + Ops.push_back(getValue(CI.getArgOperand(i))); + + // Push the arguments from the call instruction. + SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1; + for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) + Ops.push_back(*i); + + // Push live variables for the stack map. + for (unsigned i = NumArgs + 4, e = CI.getNumArgOperands(); i != e; ++i) { + SDValue OpVal = getValue(CI.getArgOperand(i)); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { + Ops.push_back( + DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back( + DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + } else + Ops.push_back(OpVal); + } + + // Push the register mask info. + if (hasGlue) + Ops.push_back(*(Call->op_end()-2)); + else + Ops.push_back(*(Call->op_end()-1)); + + // Push the chain (this is originally the first operand of the call, but + // becomes now the last or second to last operand). + Ops.push_back(*(Call->op_begin())); + + // Push the glue flag (last operand). + if (hasGlue) + Ops.push_back(*(Call->op_end()-1)); + + SDVTList NodeTys; + if (isAnyRegCC && hasDef) { + // Create the return types based on the intrinsic definition + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<EVT, 3> ValueVTs; + ComputeValueVTs(TLI, CI.getType(), ValueVTs); + assert(ValueVTs.size() == 1 && "Expected only one return value type."); + + // There is always a chain and a glue type at the end + ValueVTs.push_back(MVT::Other); + ValueVTs.push_back(MVT::Glue); + NodeTys = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); + } else + NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + // Replace the target specific call node with a PATCHPOINT node. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, + getCurSDLoc(), NodeTys, Ops); + + // Update the NodeMap. + if (hasDef) { + if (isAnyRegCC) + setValue(&CI, SDValue(MN, 0)); + else + setValue(&CI, Result.first); + } + + // Fixup the consumers of the intrinsic. The chain and glue may be used in the + // call sequence. Furthermore the location of the chain and glue can change + // when the AnyReg calling convention is used and the intrinsic returns a + // value. + if (isAnyRegCC && hasDef) { + SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; + SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + } else + DAG.ReplaceAllUsesWith(Call, MN); + DAG.DeleteNode(Call); +} + /// TargetLowering::LowerCallTo - This is the default LowerCallTo /// implementation, which just calls LowerCall. /// FIXME: When all targets are @@ -6406,6 +7000,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT; + MyFlags.ArgVT = VT; MyFlags.Used = CLI.IsReturnValueUsed; if (CLI.RetSExt) MyFlags.Flags.setSExt(); @@ -6495,7 +7090,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 - ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), + ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, i < CLI.NumFixedArgs, i, j*Parts[j].getValueType().getStoreSize()); if (NumParts > 1 && j == 0) @@ -6588,9 +7183,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { "Copy from a reg to the same reg!"); assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); - RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); + const TargetLowering *TLI = TM.getTargetLowering(); + RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); - RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0, V); + RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, 0, V); PendingExports.push_back(Chain); } @@ -6617,21 +7213,23 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; - DebugLoc dl = SDB->getCurDebugLoc(); - const DataLayout *TD = TLI.getDataLayout(); + SDLoc dl = SDB->getCurSDLoc(); + const TargetLowering *TLI = getTargetLowering(); + const DataLayout *TD = TLI->getDataLayout(); SmallVector<ISD::InputArg, 16> Ins; if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector<EVT, 1> ValueVTs; - ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + ComputeValueVTs(*getTargetLowering(), + PointerType::getUnqual(F.getReturnType()), ValueVTs); // NOTE: Assuming that a pointer will never break down to more than one VT // or one register. ISD::ArgFlagsTy Flags; Flags.setSRet(); - MVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]); - ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0); + MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]); + ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0); Ins.push_back(RetArg); } @@ -6640,8 +7238,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) { for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I->getType(), ValueVTs); + ComputeValueVTs(*TLI, I->getType(), ValueVTs); bool isArgValueUsed = !I->use_empty(); + unsigned PartBase = 0; for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; @@ -6669,18 +7268,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (F.getParamAlignment(Idx)) FrameAlign = F.getParamAlignment(Idx); else - FrameAlign = TLI.getByValTypeAlignment(ElementTy); + FrameAlign = TLI->getByValTypeAlignment(ElementTy); Flags.setByValAlign(FrameAlign); } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); Flags.setOrigAlign(OriginalAlignment); - MVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT); - unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT); + MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); + unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed, - Idx-1, i*RegisterVT.getStoreSize()); + ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, + Idx-1, PartBase+i*RegisterVT.getStoreSize()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 @@ -6688,14 +7287,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MyFlags.Flags.setOrigAlign(1); Ins.push_back(MyFlags); } + PartBase += VT.getStoreSize(); } } // Call the target to set up the argument values. SmallVector<SDValue, 8> InVals; - SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(), - F.isVarArg(), Ins, - dl, DAG, InVals); + SDValue NewRoot = TLI->LowerFormalArguments(DAG.getRoot(), F.getCallingConv(), + F.isVarArg(), Ins, + dl, DAG, InVals); // Verify that the target's LowerFormalArguments behaved as expected. assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && @@ -6721,18 +7321,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. SmallVector<EVT, 1> ValueVTs; - ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); - MVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); ISD::NodeType AssertOp = ISD::DELETED_NODE; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, NULL, AssertOp); MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); - unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)); + unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); FuncInfo->DemoteRegister = SRetReg; - NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(), + NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue); DAG.setRoot(NewRoot); @@ -6745,18 +7345,24 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ++I, ++Idx) { SmallVector<SDValue, 4> ArgValues; SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I->getType(), ValueVTs); + ComputeValueVTs(*TLI, I->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); // If this argument is unused then remember its value. It is used to generate // debugging information. - if (I->use_empty() && NumValues) + if (I->use_empty() && NumValues) { SDB->setUnusedArgValue(I, InVals[i]); + // Also remember any frame index for use in FastISel. + if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(InVals[i].getNode())) + FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); + } + for (unsigned Val = 0; Val != NumValues; ++Val) { EVT VT = ValueVTs[Val]; - MVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); - unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT); + MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT); + unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); if (!I->use_empty()) { ISD::NodeType AssertOp = ISD::DELETED_NODE; @@ -6783,11 +7389,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues, - SDB->getCurDebugLoc()); + SDB->getCurSDLoc()); SDB->setValue(I, Res); if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { - if (LoadSDNode *LNode = + if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) @@ -6885,15 +7491,36 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Remember that this register needs to added to the machine PHI node as // the input for this MBB. SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, PN->getType(), ValueVTs); + const TargetLowering *TLI = TM.getTargetLowering(); + ComputeValueVTs(*TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); + unsigned NumRegisters = TLI->getNumRegisters(*DAG.getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); Reg += NumRegisters; } } } + ConstantsOut.clear(); } + +/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB +/// is 0. +MachineBasicBlock * +SelectionDAGBuilder::StackProtectorDescriptor:: +AddSuccessorMBB(const BasicBlock *BB, + MachineBasicBlock *ParentMBB, + MachineBasicBlock *SuccMBB) { + // If SuccBB has not been created yet, create it. + if (!SuccMBB) { + MachineFunction *MF = ParentMBB->getParent(); + MachineFunction::iterator BBI = ParentMBB; + SuccMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(++BBI, SuccMBB); + } + // Add it as a successor of ParentMBB. + ParentMBB->addSuccessor(SuccMBB); + return SuccMBB; +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 9188945..835f643 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -1,4 +1,4 @@ -//===-- SelectionDAGBuilder.h - Selection-DAG building --------------------===// +//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- C++ -*---===// // // The LLVM Compiler Infrastructure // @@ -26,6 +26,7 @@ namespace llvm { +class AddrSpaceCastInst; class AliasAnalysis; class AllocaInst; class BasicBlock; @@ -80,11 +81,11 @@ class ZExtInst; /// implementation that is parameterized by a TargetLowering object. /// class SelectionDAGBuilder { - /// CurDebugLoc - current file + line number. Changes as we build the DAG. - DebugLoc CurDebugLoc; + /// CurInst - The current instruction being visited + const Instruction *CurInst; DenseMap<const Value*, SDValue> NodeMap; - + /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used /// to preserve debug information for incoming arguments. DenseMap<const Value*, SDValue> UnusedArgNodeMap; @@ -182,6 +183,17 @@ private: typedef std::vector<CaseRec> CaseRecVector; + /// The comparison function for sorting the switch case values in the vector. + /// WARNING: Case ranges should be disjoint! + struct CaseCmp { + bool operator()(const Case &C1, const Case &C2) { + assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High)); + const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); + const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); + return CI1->getValue().slt(CI2->getValue()); + } + }; + struct CaseBitsCmp { bool operator()(const CaseBits &C1, const CaseBits &C2) { return C1.Bits > C2.Bits; @@ -224,7 +236,7 @@ private: struct JumpTable { JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {} - + /// Reg - the virtual register containing the index of the jump table entry //. to jump to. unsigned Reg; @@ -278,12 +290,204 @@ private: BitTestInfo Cases; }; -public: - // TLI - This is information that describes the available target features we - // need for lowering. This indicates when operations are unavailable, - // implemented with a libcall, etc. + /// A class which encapsulates all of the information needed to generate a + /// stack protector check and signals to isel via its state being initialized + /// that a stack protector needs to be generated. + /// + /// *NOTE* The following is a high level documentation of SelectionDAG Stack + /// Protector Generation. The reason that it is placed here is for a lack of + /// other good places to stick it. + /// + /// High Level Overview of SelectionDAG Stack Protector Generation: + /// + /// Previously, generation of stack protectors was done exclusively in the + /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated + /// splitting basic blocks at the IR level to create the success/failure basic + /// blocks in the tail of the basic block in question. As a result of this, + /// calls that would have qualified for the sibling call optimization were no + /// longer eligible for optimization since said calls were no longer right in + /// the "tail position" (i.e. the immediate predecessor of a ReturnInst + /// instruction). + /// + /// Then it was noticed that since the sibling call optimization causes the + /// callee to reuse the caller's stack, if we could delay the generation of + /// the stack protector check until later in CodeGen after the sibling call + /// decision was made, we get both the tail call optimization and the stack + /// protector check! + /// + /// A few goals in solving this problem were: + /// + /// 1. Preserve the architecture independence of stack protector generation. + /// + /// 2. Preserve the normal IR level stack protector check for platforms like + /// OpenBSD for which we support platform specific stack protector + /// generation. + /// + /// The main problem that guided the present solution is that one can not + /// solve this problem in an architecture independent manner at the IR level + /// only. This is because: + /// + /// 1. The decision on whether or not to perform a sibling call on certain + /// platforms (for instance i386) requires lower level information + /// related to available registers that can not be known at the IR level. + /// + /// 2. Even if the previous point were not true, the decision on whether to + /// perform a tail call is done in LowerCallTo in SelectionDAG which + /// occurs after the Stack Protector Pass. As a result, one would need to + /// put the relevant callinst into the stack protector check success + /// basic block (where the return inst is placed) and then move it back + /// later at SelectionDAG/MI time before the stack protector check if the + /// tail call optimization failed. The MI level option was nixed + /// immediately since it would require platform specific pattern + /// matching. The SelectionDAG level option was nixed because + /// SelectionDAG only processes one IR level basic block at a time + /// implying one could not create a DAG Combine to move the callinst. + /// + /// To get around this problem a few things were realized: + /// + /// 1. While one can not handle multiple IR level basic blocks at the + /// SelectionDAG Level, one can generate multiple machine basic blocks + /// for one IR level basic block. This is how we handle bit tests and + /// switches. + /// + /// 2. At the MI level, tail calls are represented via a special return + /// MIInst called "tcreturn". Thus if we know the basic block in which we + /// wish to insert the stack protector check, we get the correct behavior + /// by always inserting the stack protector check right before the return + /// statement. This is a "magical transformation" since no matter where + /// the stack protector check intrinsic is, we always insert the stack + /// protector check code at the end of the BB. + /// + /// Given the aforementioned constraints, the following solution was devised: + /// + /// 1. On platforms that do not support SelectionDAG stack protector check + /// generation, allow for the normal IR level stack protector check + /// generation to continue. + /// + /// 2. On platforms that do support SelectionDAG stack protector check + /// generation: + /// + /// a. Use the IR level stack protector pass to decide if a stack + /// protector is required/which BB we insert the stack protector check + /// in by reusing the logic already therein. If we wish to generate a + /// stack protector check in a basic block, we place a special IR + /// intrinsic called llvm.stackprotectorcheck right before the BB's + /// returninst or if there is a callinst that could potentially be + /// sibling call optimized, before the call inst. + /// + /// b. Then when a BB with said intrinsic is processed, we codegen the BB + /// normally via SelectBasicBlock. In said process, when we visit the + /// stack protector check, we do not actually emit anything into the + /// BB. Instead, we just initialize the stack protector descriptor + /// class (which involves stashing information/creating the success + /// mbbb and the failure mbb if we have not created one for this + /// function yet) and export the guard variable that we are going to + /// compare. + /// + /// c. After we finish selecting the basic block, in FinishBasicBlock if + /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is + /// initialized, we first find a splice point in the parent basic block + /// before the terminator and then splice the terminator of said basic + /// block into the success basic block. Then we code-gen a new tail for + /// the parent basic block consisting of the two loads, the comparison, + /// and finally two branches to the success/failure basic blocks. We + /// conclude by code-gening the failure basic block if we have not + /// code-gened it already (all stack protector checks we generate in + /// the same function, use the same failure basic block). + class StackProtectorDescriptor { + public: + StackProtectorDescriptor() : ParentMBB(0), SuccessMBB(0), FailureMBB(0), + Guard(0) { } + ~StackProtectorDescriptor() { } + + /// Returns true if all fields of the stack protector descriptor are + /// initialized implying that we should/are ready to emit a stack protector. + bool shouldEmitStackProtector() const { + return ParentMBB && SuccessMBB && FailureMBB && Guard; + } + + /// Initialize the stack protector descriptor structure for a new basic + /// block. + void initialize(const BasicBlock *BB, + MachineBasicBlock *MBB, + const CallInst &StackProtCheckCall) { + // Make sure we are not initialized yet. + assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is " + "already initialized!"); + ParentMBB = MBB; + SuccessMBB = AddSuccessorMBB(BB, MBB); + FailureMBB = AddSuccessorMBB(BB, MBB, FailureMBB); + if (!Guard) + Guard = StackProtCheckCall.getArgOperand(0); + } + + /// Reset state that changes when we handle different basic blocks. + /// + /// This currently includes: + /// + /// 1. The specific basic block we are generating a + /// stack protector for (ParentMBB). + /// + /// 2. The successor machine basic block that will contain the tail of + /// parent mbb after we create the stack protector check (SuccessMBB). This + /// BB is visited only on stack protector check success. + void resetPerBBState() { + ParentMBB = 0; + SuccessMBB = 0; + } + + /// Reset state that only changes when we switch functions. + /// + /// This currently includes: + /// + /// 1. FailureMBB since we reuse the failure code path for all stack + /// protector checks created in an individual function. + /// + /// 2.The guard variable since the guard variable we are checking against is + /// always the same. + void resetPerFunctionState() { + FailureMBB = 0; + Guard = 0; + } + + MachineBasicBlock *getParentMBB() { return ParentMBB; } + MachineBasicBlock *getSuccessMBB() { return SuccessMBB; } + MachineBasicBlock *getFailureMBB() { return FailureMBB; } + const Value *getGuard() { return Guard; } + + private: + /// The basic block for which we are generating the stack protector. + /// + /// As a result of stack protector generation, we will splice the + /// terminators of this basic block into the successor mbb SuccessMBB and + /// replace it with a compare/branch to the successor mbbs + /// SuccessMBB/FailureMBB depending on whether or not the stack protector + /// was violated. + MachineBasicBlock *ParentMBB; + + /// A basic block visited on stack protector check success that contains the + /// terminators of ParentMBB. + MachineBasicBlock *SuccessMBB; + + /// This basic block visited on stack protector check failure that will + /// contain a call to __stack_chk_fail(). + MachineBasicBlock *FailureMBB; + + /// The guard variable which we will compare against the stored value in the + /// stack protector stack slot. + const Value *Guard; + + /// Add a successor machine basic block to ParentMBB. If the successor mbb + /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic + /// block will be created. + MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB, + MachineBasicBlock *ParentMBB, + MachineBasicBlock *SuccMBB = 0); + }; + +private: const TargetMachine &TM; - const TargetLowering &TLI; +public: SelectionDAG &DAG; const DataLayout *TD; AliasAnalysis *AA; @@ -298,6 +502,9 @@ public: /// BitTestCases - Vector of BitTestBlock structures used to communicate /// SwitchInst code generation information. std::vector<BitTestBlock> BitTestCases; + /// A StackProtectorDescriptor structure used to communicate stack protector + /// information in between SelectBasicBlock and FinishBasicBlock. + StackProtectorDescriptor SPDescriptor; // Emit PHI-node-operand constants only once even if used by multiple // PHI nodes. @@ -308,9 +515,9 @@ public: FunctionLoweringInfo &FuncInfo; /// OptLevel - What optimization level we're generating code for. - /// + /// CodeGenOpt::Level OptLevel; - + /// GFI - Garbage collection metadata for the function. GCFunctionInfo *GFI; @@ -327,7 +534,7 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) - : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), + : CurInst(NULL), SDNodeOrder(0), TM(dag.getTarget()), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), HasTailCall(false) { } @@ -364,17 +571,18 @@ public: /// SDValue getControlRoot(); - DebugLoc getCurDebugLoc() const { return CurDebugLoc; } + SDLoc getCurSDLoc() const { + return SDLoc(CurInst, SDNodeOrder); + } + + DebugLoc getCurDebugLoc() const { + return CurInst ? CurInst->getDebugLoc() : DebugLoc(); + } unsigned getSDNodeOrder() const { return SDNodeOrder; } void CopyValueToVirtualRegister(const Value *V, unsigned Reg); - /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten - /// from how the code appeared in the source. The ordering is used by the - /// scheduler to effectively turn off scheduling. - void AssignOrderingToNode(const SDNode *Node); - void visit(const Instruction &I); void visit(unsigned Opcode, const User &I); @@ -391,7 +599,7 @@ public: assert(N.getNode() == 0 && "Already set a value for this node!"); N = NewN; } - + void setUnusedArgValue(const Value *V, SDValue NewN) { SDValue &N = UnusedArgNodeMap[V]; assert(N.getNode() == 0 && "Already set a value for this node!"); @@ -412,6 +620,12 @@ public: void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, MachineBasicBlock *LandingPad = NULL); + std::pair<SDValue, SDValue> LowerCallOperands(const CallInst &CI, + unsigned ArgIdx, + unsigned NumArgs, + SDValue Callee, + bool useVoidTy = false); + /// UpdateSplitBlock - When an MBB was split during scheduling, update the /// references that ned to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); @@ -453,6 +667,9 @@ private: public: void visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB); + void visitSPDescriptorParent(StackProtectorDescriptor &SPD, + MachineBasicBlock *ParentBB); + void visitSPDescriptorFailure(StackProtectorDescriptor &SPD); void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); void visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, @@ -463,7 +680,7 @@ public: void visitJumpTable(JumpTable &JT); void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB); - + private: // These all get lowered before this pass. void visitInvoke(const InvokeInst &I); @@ -504,6 +721,7 @@ private: void visitPtrToInt(const User &I); void visitIntToPtr(const User &I); void visitBitCast(const User &I); + void visitAddrSpaceCast(const User &I); void visitExtractElement(const User &I); void visitInsertElement(const User &I); @@ -525,6 +743,11 @@ private: void visitPHI(const PHINode &I); void visitCall(const CallInst &I); bool visitMemCmpCall(const CallInst &I); + bool visitMemChrCall(const CallInst &I); + bool visitStrCpyCall(const CallInst &I, bool isStpcpy); + bool visitStrCmpCall(const CallInst &I); + bool visitStrLenCall(const CallInst &I); + bool visitStrNLenCall(const CallInst &I); bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); void visitAtomicLoad(const LoadInst &I); void visitAtomicStore(const StoreInst &I); @@ -537,6 +760,8 @@ private: void visitVAArg(const VAArgInst &I); void visitVAEnd(const CallInst &I); void visitVACopy(const CallInst &I); + void visitStackmap(const CallInst &I); + void visitPatchpoint(const CallInst &I); void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); @@ -545,10 +770,13 @@ private: llvm_unreachable("UserOp2 should not exist at instruction selection time!"); } + void processIntegerCallValue(const Instruction &I, + SDValue Value, bool IsSigned); + void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); /// EmitFuncArgumentDbgValue - If V is an function argument then create - /// corresponding DBG_VALUE machine instruction for it now. At the end of + /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, int64_t Offset, const SDValue &N); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 47b0391..c04a08d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -92,9 +92,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; - case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; - case ISD::LSDAADDR: return "LSDAADDR"; - case ISD::EHSELECTION: return "EHSELECTION"; case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; @@ -145,6 +142,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FCEIL: return "fceil"; case ISD::FRINT: return "frint"; case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FROUND: return "fround"; case ISD::FEXP: return "fexp"; case ISD::FEXP2: return "fexp2"; case ISD::FLOG: return "flog"; @@ -226,6 +224,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FP_TO_SINT: return "fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; case ISD::BITCAST: return "bitcast"; + case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP32: return "fp16_to_fp32"; case ISD::FP32_TO_FP16: return "fp32_to_fp16"; @@ -487,10 +486,16 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << " " << offset; if (unsigned int TF = BA->getTargetFlags()) OS << " [TF=" << TF << ']'; + } else if (const AddrSpaceCastSDNode *ASC = + dyn_cast<AddrSpaceCastSDNode>(this)) { + OS << '[' + << ASC->getSrcAddressSpace() + << " -> " + << ASC->getDestAddressSpace() + << ']'; } - if (G) - if (unsigned Order = G->GetOrdering(this)) + if (unsigned Order = getIROrder()) OS << " [ORD=" << Order << ']'; if (getNodeId() != -1) @@ -501,8 +506,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { DIScope Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); OS << " dbg:"; + assert((!Scope || Scope.isScope()) && + "Scope of a DebugLoc should be null or a DIScope."); // Omit the directory, since it's usually long and uninteresting. - if (Scope.Verify()) + if (Scope) OS << Scope.getFilename(); else OS << "<unknown>"; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 9935626..3a0cfa1 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -222,23 +223,61 @@ defaultListDAGScheduler("default", "Best scheduler for the target", namespace llvm { //===--------------------------------------------------------------------===// + /// \brief This class is used by SelectionDAGISel to temporarily override + /// the optimization level on a per-function basis. + class OptLevelChanger { + SelectionDAGISel &IS; + CodeGenOpt::Level SavedOptLevel; + bool SavedFastISel; + + public: + OptLevelChanger(SelectionDAGISel &ISel, + CodeGenOpt::Level NewOptLevel) : IS(ISel) { + SavedOptLevel = IS.OptLevel; + if (NewOptLevel == SavedOptLevel) + return; + IS.OptLevel = NewOptLevel; + IS.TM.setOptLevel(NewOptLevel); + SavedFastISel = IS.TM.Options.EnableFastISel; + if (NewOptLevel == CodeGenOpt::None) + IS.TM.setFastISel(true); + DEBUG(dbgs() << "\nChanging optimization level for Function " + << IS.MF->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel + << " ; After: -O" << NewOptLevel << "\n"); + } + + ~OptLevelChanger() { + if (IS.OptLevel == SavedOptLevel) + return; + DEBUG(dbgs() << "\nRestoring optimization level for Function " + << IS.MF->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel + << " ; After: -O" << SavedOptLevel << "\n"); + IS.OptLevel = SavedOptLevel; + IS.TM.setOptLevel(SavedOptLevel); + IS.TM.setFastISel(SavedFastISel); + } + }; + + //===--------------------------------------------------------------------===// /// createDefaultScheduler - This creates an instruction scheduler appropriate /// for the target. ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetLowering &TLI = IS->getTargetLowering(); + const TargetLowering *TLI = IS->getTargetLowering(); const TargetSubtargetInfo &ST = IS->TM.getSubtarget<TargetSubtargetInfo>(); - if (OptLevel == CodeGenOpt::None || ST.enableMachineScheduler() || - TLI.getSchedulingPreference() == Sched::Source) + if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() || + TLI->getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); - if (TLI.getSchedulingPreference() == Sched::RegPressure) + if (TLI->getSchedulingPreference() == Sched::RegPressure) return createBURRListDAGScheduler(IS, OptLevel); - if (TLI.getSchedulingPreference() == Sched::Hybrid) + if (TLI->getSchedulingPreference() == Sched::Hybrid) return createHybridListDAGScheduler(IS, OptLevel); - if (TLI.getSchedulingPreference() == Sched::VLIW) + if (TLI->getSchedulingPreference() == Sched::VLIW) return createVLIWDAGScheduler(IS, OptLevel); - assert(TLI.getSchedulingPreference() == Sched::ILP && + assert(TLI->getSchedulingPreference() == Sched::ILP && "Unknown sched type!"); return createILPListDAGScheduler(IS, OptLevel); } @@ -275,10 +314,10 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // SelectionDAGISel code //===----------------------------------------------------------------------===// -SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, +SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) : - MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()), - FuncInfo(new FunctionLoweringInfo(TLI)), + MachineFunctionPass(ID), TM(tm), + FuncInfo(new FunctionLoweringInfo(TM)), CurDAG(new SelectionDAG(tm, OL)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), @@ -355,6 +394,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { const Function &Fn = *mf.getFunction(); const TargetInstrInfo &TII = *TM.getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + const TargetLowering *TLI = TM.getTargetLowering(); MF = &mf; RegInfo = &MF->getRegInfo(); @@ -368,11 +408,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { ST.resetSubtargetFeatures(MF); TM.resetTargetOptions(MF); + // Reset OptLevel to None for optnone functions. + CodeGenOpt::Level NewOptLevel = OptLevel; + if (Fn.hasFnAttribute(Attribute::OptimizeNone)) + NewOptLevel = CodeGenOpt::None; + OptLevelChanger OLC(*this, NewOptLevel); + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); - CurDAG->init(*MF, TTI); + CurDAG->init(*MF, TTI, TLI); FuncInfo->set(Fn, *MF); if (UseMBPI && OptLevel != CodeGenOpt::None) @@ -401,29 +447,37 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Insert DBG_VALUE instructions for function arguments to the entry block. for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; - unsigned Reg = MI->getOperand(0).getReg(); + bool hasFI = MI->getOperand(0).isFI(); + unsigned Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) EntryMBB->insert(EntryMBB->begin(), MI); else { MachineInstr *Def = RegInfo->getVRegDef(Reg); - MachineBasicBlock::iterator InsertPos = Def; - // FIXME: VR def may not be in entry block. - Def->getParent()->insert(llvm::next(InsertPos), MI); + if (Def) { + MachineBasicBlock::iterator InsertPos = Def; + // FIXME: VR def may not be in entry block. + Def->getParent()->insert(llvm::next(InsertPos), MI); + } else + DEBUG(dbgs() << "Dropping debug info for dead vreg" + << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); } // If Reg is live-in then update debug info to track its copy in a vreg. DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg); if (LDI != LiveInMap.end()) { + assert(!hasFI && "There's no handling of frame pointer updating here yet " + "- add if needed"); MachineInstr *Def = RegInfo->getVRegDef(LDI->second); MachineBasicBlock::iterator InsertPos = Def; const MDNode *Variable = MI->getOperand(MI->getNumOperands()-1).getMetadata(); - unsigned Offset = MI->getOperand(1).getImm(); + bool IsIndirect = MI->isIndirectDebugValue(); + unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; // Def is never a terminator here, so it is ok to increment InsertPos. BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), - TII.get(TargetOpcode::DBG_VALUE)) - .addReg(LDI->second, RegState::Debug) - .addImm(Offset).addMetadata(Variable); + TII.get(TargetOpcode::DBG_VALUE), + IsIndirect, + LDI->second, Offset, Variable); // If this vreg is directly copied into an exported register then // that COPY instructions also need DBG_VALUE, if it is the only @@ -442,9 +496,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (CopyUseMI) { MachineInstr *NewMI = BuildMI(*MF, CopyUseMI->getDebugLoc(), - TII.get(TargetOpcode::DBG_VALUE)) - .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug) - .addImm(Offset).addMetadata(Variable); + TII.get(TargetOpcode::DBG_VALUE), + IsIndirect, + CopyUseMI->getOperand(0).getReg(), + Offset, Variable); MachineBasicBlock::iterator Pos = CopyUseMI; EntryMBB->insertAfter(Pos, NewMI); } @@ -491,6 +546,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (J == E) break; To = J->second; } + // Make sure the new register has a sufficiently constrained register class. + if (TargetRegisterInfo::isVirtualRegister(From) && + TargetRegisterInfo::isVirtualRegister(To)) + MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. MRI.replaceRegWith(From, To); } @@ -611,6 +670,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); + CurDAG->NewNodesMustHaveLegalTypes = true; + if (Changed) { if (ViewDAGCombineLT) CurDAG->viewGraph("dag-combine-lt input for " + BlockName); @@ -624,6 +685,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); + } { @@ -790,9 +852,6 @@ void SelectionDAGISel::DoInstructionSelection() { continue; // Replace node. if (ResNode) { - // Propagate ordering - CurDAG->AssignOrdering(ResNode, CurDAG->GetOrdering(Node)); - ReplaceUses(Node, ResNode); } @@ -827,12 +886,13 @@ void SelectionDAGISel::PrepareEHLandingPad() { .addSym(Label); // Mark exception register as live in. - const TargetRegisterClass *PtrRC = TLI.getRegClassFor(TLI.getPointerTy()); - if (unsigned Reg = TLI.getExceptionPointerRegister()) + const TargetLowering *TLI = getTargetLowering(); + const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); + if (unsigned Reg = TLI->getExceptionPointerRegister()) FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); // Mark exception selector register as live in. - if (unsigned Reg = TLI.getExceptionSelectorRegister()) + if (unsigned Reg = TLI->getExceptionSelectorRegister()) FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC); } @@ -932,7 +992,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; if (TM.Options.EnableFastISel) - FastIS = TLI.createFastISel(*FuncInfo, LibInfo); + FastIS = getTargetLowering()->createFastISel(*FuncInfo, LibInfo); // Iterate over all basic blocks in the function. ReversePostOrderTraversal<const Function*> RPOT(&Fn); @@ -1135,6 +1195,91 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { delete FastIS; SDB->clearDanglingDebugInfo(); + SDB->SPDescriptor.resetPerFunctionState(); +} + +/// Given that the input MI is before a partial terminator sequence TSeq, return +/// true if M + TSeq also a partial terminator sequence. +/// +/// A Terminator sequence is a sequence of MachineInstrs which at this point in +/// lowering copy vregs into physical registers, which are then passed into +/// terminator instructors so we can satisfy ABI constraints. A partial +/// terminator sequence is an improper subset of a terminator sequence (i.e. it +/// may be the whole terminator sequence). +static bool MIIsInTerminatorSequence(const MachineInstr *MI) { + // If we do not have a copy or an implicit def, we return true if and only if + // MI is a debug value. + if (!MI->isCopy() && !MI->isImplicitDef()) + // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the + // physical registers if there is debug info associated with the terminator + // of our mbb. We want to include said debug info in our terminator + // sequence, so we return true in that case. + return MI->isDebugValue(); + + // We have left the terminator sequence if we are not doing one of the + // following: + // + // 1. Copying a vreg into a physical register. + // 2. Copying a vreg into a vreg. + // 3. Defining a register via an implicit def. + + // OPI should always be a register definition... + MachineInstr::const_mop_iterator OPI = MI->operands_begin(); + if (!OPI->isReg() || !OPI->isDef()) + return false; + + // Defining any register via an implicit def is always ok. + if (MI->isImplicitDef()) + return true; + + // Grab the copy source... + MachineInstr::const_mop_iterator OPI2 = OPI; + ++OPI2; + assert(OPI2 != MI->operands_end() + && "Should have a copy implying we should have 2 arguments."); + + // Make sure that the copy dest is not a vreg when the copy source is a + // physical register. + if (!OPI2->isReg() || + (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) && + TargetRegisterInfo::isPhysicalRegister(OPI2->getReg()))) + return false; + + return true; +} + +/// Find the split point at which to splice the end of BB into its success stack +/// protector check machine basic block. +/// +/// On many platforms, due to ABI constraints, terminators, even before register +/// allocation, use physical registers. This creates an issue for us since +/// physical registers at this point can not travel across basic +/// blocks. Luckily, selectiondag always moves physical registers into vregs +/// when they enter functions and moves them through a sequence of copies back +/// into the physical registers right before the terminator creating a +/// ``Terminator Sequence''. This function is searching for the beginning of the +/// terminator sequence so that we can ensure that we splice off not just the +/// terminator, but additionally the copies that move the vregs into the +/// physical registers. +static MachineBasicBlock::iterator +FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) { + MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); + // + if (SplitPoint == BB->begin()) + return SplitPoint; + + MachineBasicBlock::iterator Start = BB->begin(); + MachineBasicBlock::iterator Previous = SplitPoint; + --Previous; + + while (MIIsInTerminatorSequence(Previous)) { + SplitPoint = Previous; + if (Previous == Start) + break; + --Previous; + } + + return SplitPoint; } void @@ -1147,11 +1292,13 @@ SelectionDAGISel::FinishBasicBlock() { << FuncInfo->PHINodesToUpdate[i].first << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); + const bool MustUpdatePHINodes = SDB->SwitchCases.empty() && + SDB->JTCases.empty() && + SDB->BitTestCases.empty(); + // Next, now that we know what the last MBB the LLVM BB expanded is, update // PHI nodes in successors. - if (SDB->SwitchCases.empty() && - SDB->JTCases.empty() && - SDB->BitTestCases.empty()) { + if (MustUpdatePHINodes) { for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); assert(PHI->isPHI() && @@ -1160,9 +1307,54 @@ SelectionDAGISel::FinishBasicBlock() { continue; PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); } - return; } + // Handle stack protector. + if (SDB->SPDescriptor.shouldEmitStackProtector()) { + MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB(); + MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB(); + + // Find the split point to split the parent mbb. At the same time copy all + // physical registers used in the tail of parent mbb into virtual registers + // before the split point and back into physical registers after the split + // point. This prevents us needing to deal with Live-ins and many other + // register allocation issues caused by us splitting the parent mbb. The + // register allocator will clean up said virtual copies later on. + MachineBasicBlock::iterator SplitPoint = + FindSplitPointForStackProtector(ParentMBB, SDB->getCurDebugLoc()); + + // Splice the terminator of ParentMBB into SuccessMBB. + SuccessMBB->splice(SuccessMBB->end(), ParentMBB, + SplitPoint, + ParentMBB->end()); + + // Add compare/jump on neq/jump to the parent BB. + FuncInfo->MBB = ParentMBB; + FuncInfo->InsertPt = ParentMBB->end(); + SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // CodeGen Failure MBB if we have not codegened it yet. + MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB(); + if (!FailureMBB->size()) { + FuncInfo->MBB = FailureMBB; + FuncInfo->InsertPt = FailureMBB->end(); + SDB->visitSPDescriptorFailure(SDB->SPDescriptor); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + // Clear the Per-BB State. + SDB->SPDescriptor.resetPerBBState(); + } + + // If we updated PHI Nodes, return early. + if (MustUpdatePHINodes) + return; + for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) { // Lower header first, if it wasn't already lowered if (!SDB->BitTestCases[i].Emitted) { @@ -1609,7 +1801,7 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { SelectInlineAsmMemoryOperands(Ops); EVT VTs[] = { MVT::Other, MVT::Glue }; - SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), + SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), VTs, &Ops[0], Ops.size()); New->setNodeId(-1); return New.getNode(); @@ -1881,10 +2073,9 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, } } - SDValue Res; if (InputChains.size() == 1) return InputChains[0]; - return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(), + return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]), MVT::Other, &InputChains[0], InputChains.size()); } @@ -1957,6 +2148,18 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, return N == RecordedNodes[RecNo].first; } +/// CheckChildSame - Implements OP_CheckChildXSame. +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, + const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes, + unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo), + RecordedNodes); +} + /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, @@ -1981,24 +2184,23 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const TargetLowering &TLI) { + SDValue N, const TargetLowering *TLI) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (N.getValueType() == VT) return true; // Handle the case when VT is iPTR. - return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy(); + return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(); } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const TargetLowering &TLI, + SDValue N, const TargetLowering *TLI, unsigned ChildNo) { if (ChildNo >= N.getNumOperands()) return false; // Match fails if out of range child #. return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI); } - LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { @@ -2008,13 +2210,13 @@ CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const TargetLowering &TLI) { + SDValue N, const TargetLowering *TLI) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (cast<VTSDNode>(N)->getVT() == VT) return true; // Handle the case when VT is iPTR. - return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy(); + return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(); } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool @@ -2072,6 +2274,13 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckSame: Result = !::CheckSame(Table, Index, N, RecordedNodes); return Index; + case SelectionDAGISel::OPC_CheckChild0Same: + case SelectionDAGISel::OPC_CheckChild1Same: + case SelectionDAGISel::OPC_CheckChild2Same: + case SelectionDAGISel::OPC_CheckChild3Same: + Result = !::CheckChildSame(Table, Index, N, RecordedNodes, + Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same); + return Index; case SelectionDAGISel::OPC_CheckPatternPredicate: Result = !::CheckPatternPredicate(Table, Index, SDISel); return Index; @@ -2082,7 +2291,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, Result = !::CheckOpcode(Table, Index, N.getNode()); return Index; case SelectionDAGISel::OPC_CheckType: - Result = !::CheckType(Table, Index, N, SDISel.TLI); + Result = !::CheckType(Table, Index, N, SDISel.getTargetLowering()); return Index; case SelectionDAGISel::OPC_CheckChild0Type: case SelectionDAGISel::OPC_CheckChild1Type: @@ -2092,14 +2301,14 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckChild5Type: case SelectionDAGISel::OPC_CheckChild6Type: case SelectionDAGISel::OPC_CheckChild7Type: - Result = !::CheckChildType(Table, Index, N, SDISel.TLI, + Result = !::CheckChildType(Table, Index, N, SDISel.getTargetLowering(), Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type); return Index; case SelectionDAGISel::OPC_CheckCondCode: Result = !::CheckCondCode(Table, Index, N); return Index; case SelectionDAGISel::OPC_CheckValueType: - Result = !::CheckValueType(Table, Index, N, SDISel.TLI); + Result = !::CheckValueType(Table, Index, N, SDISel.getTargetLowering()); return Index; case SelectionDAGISel::OPC_CheckInteger: Result = !::CheckInteger(Table, Index, N); @@ -2369,6 +2578,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckSame: if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break; continue; + + case OPC_CheckChild0Same: case OPC_CheckChild1Same: + case OPC_CheckChild2Same: case OPC_CheckChild3Same: + if (!::CheckChildSame(MatcherTable, MatcherIndex, N, RecordedNodes, + Opcode-OPC_CheckChild0Same)) + break; + continue; + case OPC_CheckPatternPredicate: if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break; continue; @@ -2392,7 +2609,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, continue; case OPC_CheckType: - if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break; + if (!::CheckType(MatcherTable, MatcherIndex, N, getTargetLowering())) + break; continue; case OPC_SwitchOpcode: { @@ -2427,7 +2645,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, } case OPC_SwitchType: { - MVT CurNodeVT = N.getValueType().getSimpleVT(); + MVT CurNodeVT = N.getSimpleValueType(); unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; unsigned CaseSize; while (1) { @@ -2439,7 +2657,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (CaseVT == MVT::iPTR) - CaseVT = TLI.getPointerTy(); + CaseVT = getTargetLowering()->getPointerTy(); // If the VT matches, then we will execute this case. if (CurNodeVT == CaseVT) @@ -2461,7 +2679,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckChild2Type: case OPC_CheckChild3Type: case OPC_CheckChild4Type: case OPC_CheckChild5Type: case OPC_CheckChild6Type: case OPC_CheckChild7Type: - if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI, + if (!::CheckChildType(MatcherTable, MatcherIndex, N, getTargetLowering(), Opcode-OPC_CheckChild0Type)) break; continue; @@ -2469,7 +2687,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break; continue; case OPC_CheckValueType: - if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break; + if (!::CheckValueType(MatcherTable, MatcherIndex, N, getTargetLowering())) + break; continue; case OPC_CheckInteger: if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break; @@ -2538,7 +2757,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitConvertToTarget: { // Convert from IMM/FPIMM to target version. unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitConvertToTarget"); SDValue Imm = RecordedNodes[RecNo].first; if (Imm->getOpcode() == ISD::Constant) { @@ -2563,7 +2782,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Read all of the chained nodes. unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); // FIXME: What if other value results of the node have uses not matched @@ -2600,7 +2819,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Read all of the chained nodes. for (unsigned i = 0; i != NumChains; ++i) { unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); // FIXME: What if other value results of the node have uses not matched @@ -2627,13 +2846,13 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitCopyToReg: { unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg"); unsigned DestPhysReg = MatcherTable[MatcherIndex++]; if (InputChain.getNode() == 0) InputChain = CurDAG->getEntryNode(); - InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(), + InputChain = CurDAG->getCopyToReg(InputChain, SDLoc(NodeToMatch), DestPhysReg, RecordedNodes[RecNo].first, InputGlue); @@ -2644,7 +2863,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitNodeXForm: { unsigned XFormNo = MatcherTable[MatcherIndex++]; unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm"); SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo); RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0)); continue; @@ -2661,7 +2880,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, for (unsigned i = 0; i != NumVTs; ++i) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; - if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy; + if (VT == MVT::iPTR) VT = getTargetLowering()->getPointerTy().SimpleTy; VTs.push_back(VT); } @@ -2720,7 +2939,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (Opcode != OPC_MorphNodeTo) { // If this is a normal EmitNode command, just create the new node and // add the results to the RecordedNodes list. - Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(), + Res = CurDAG->getMachineNode(TargetOpc, SDLoc(NodeToMatch), VTList, Ops); // Add all the non-glue/non-chain results to the RecordedNodes list. @@ -2763,8 +2982,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, bool mayStore = MCID.mayStore(); unsigned NumMemRefs = 0; - for (SmallVector<MachineMemOperand*, 2>::const_iterator I = - MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { + for (SmallVectorImpl<MachineMemOperand *>::const_iterator I = + MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { if ((*I)->isLoad()) { if (mayLoad) ++NumMemRefs; @@ -2780,8 +2999,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, MF->allocateMemRefsArray(NumMemRefs); MachineSDNode::mmo_iterator MemRefsPos = MemRefs; - for (SmallVector<MachineMemOperand*, 2>::const_iterator I = - MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { + for (SmallVectorImpl<MachineMemOperand *>::const_iterator I = + MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { if ((*I)->isLoad()) { if (mayLoad) *MemRefsPos++ = *I; @@ -2821,7 +3040,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (RecNo & 128) RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid MarkGlueResults"); GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); } continue; @@ -2838,7 +3057,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (ResSlot & 128) ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex); - assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame"); + assert(ResSlot < RecordedNodes.size() && "Invalid CompleteMatch"); SDValue Res = RecordedNodes[ResSlot].first; assert(i < NodeToMatch->getNumValues() && diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index f5fc66c..82b068d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -64,13 +64,29 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, return isUsedByReturnOnly(Node, Chain); } +/// \brief Set CallLoweringInfo attribute flags based on a call instruction +/// and called function attributes. +void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, + unsigned AttrIdx) { + isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt); + isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt); + isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg); + isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); + isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); + isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); + Alignment = CS->getParamAlignment(AttrIdx); +} /// Generate a libcall taking the given operands as arguments and returning a /// result of type RetVT. -SDValue TargetLowering::makeLibCall(SelectionDAG &DAG, - RTLIB::Libcall LC, EVT RetVT, - const SDValue *Ops, unsigned NumOps, - bool isSigned, DebugLoc dl) const { +std::pair<SDValue, SDValue> +TargetLowering::makeLibCall(SelectionDAG &DAG, + RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, SDLoc dl, + bool doesNotReturn, + bool isReturnValueUsed) const { TargetLowering::ArgListTy Args; Args.reserve(NumOps); @@ -89,11 +105,9 @@ SDValue TargetLowering::makeLibCall(SelectionDAG &DAG, CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, false, 0, getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); - std::pair<SDValue,SDValue> CallInfo = LowerCallTo(CLI); - - return CallInfo.first; + doesNotReturn, isReturnValueUsed, Callee, Args, + DAG, dl); + return LowerCallTo(CLI); } @@ -102,7 +116,7 @@ SDValue TargetLowering::makeLibCall(SelectionDAG &DAG, void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - DebugLoc dl) const { + SDLoc dl) const { assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) && "Unsupported setcc type!"); @@ -183,14 +197,18 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, // Use the target specific return value for comparions lib calls. EVT RetVT = getCmpLibcallReturnType(); SDValue Ops[2] = { NewLHS, NewRHS }; - NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, + dl).first; NewRHS = DAG.getConstant(0, RetVT); CCCode = getCmpLibcallCC(LC1); if (LC2 != RTLIB::UNKNOWN_LIBCALL) { - SDValue Tmp = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), + SDValue Tmp = DAG.getNode(ISD::SETCC, dl, + getSetCCResultType(*DAG.getContext(), RetVT), NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl); - NewLHS = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), NewLHS, + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, + dl).first; + NewLHS = DAG.getNode(ISD::SETCC, dl, + getSetCCResultType(*DAG.getContext(), RetVT), NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2))); NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); NewRHS = SDValue(); @@ -262,7 +280,7 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// constant and return true. bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // FIXME: ISD::SELECT, ISD::SELECT_CC switch (Op.getOpcode()) { @@ -302,7 +320,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, - DebugLoc dl) { + SDLoc dl) { assert(Op.getNumOperands() == 2 && "ShrinkDemandedOp only supports binary operators!"); assert(Op.getNode()->getNumValues() == 1 && @@ -356,7 +374,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth && "Mask size mismatches value type size!"); APInt NewMask = DemandedMask; - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Don't know anything. KnownZero = KnownOne = APInt(BitWidth, 0); @@ -508,7 +526,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 // NB: it is okay if more bits are known than are requested - if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side + if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side if (KnownOne == KnownOne2) { // set bits are the same on both sides EVT VT = Op.getValueType(); SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); @@ -630,6 +648,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), NarrowShl)); } + // Repeat the SHL optimization above in cases where an extension + // intervenes: (shl (anyext (shr x, c1)), c2) to + // (shl (anyext x), c2-c1). This requires that the bottom c1 bits + // aren't demanded (as above) and that the shifted upper c1 bits of + // x aren't demanded. + if (InOp.hasOneUse() && + InnerOp.getOpcode() == ISD::SRL && + InnerOp.hasOneUse() && + isa<ConstantSDNode>(InnerOp.getOperand(1))) { + uint64_t InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1)) + ->getZExtValue(); + if (InnerShAmt < ShAmt && + InnerShAmt < InnerBits && + NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 && + NewMask.trunc(ShAmt) == 0) { + SDValue NewSA = + TLO.DAG.getConstant(ShAmt - InnerShAmt, + Op.getOperand(1).getValueType()); + EVT VT = Op.getValueType(); + SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, + InnerOp.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, + NewExt, NewSA)); + } + } } KnownZero <<= SA->getZExtValue(); @@ -720,13 +763,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. - if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { + if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), Op.getOperand(1))); - } else if (KnownOne.intersects(SignBit)) { // New bits are known one. - KnownOne |= HighBits; + + int Log2 = NewMask.exactLogBase2(); + if (Log2 >= 0) { + // The bit must come from the sign. + SDValue NewSA = + TLO.DAG.getConstant(BitWidth - 1 - Log2, + Op.getOperand(1).getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, + Op.getOperand(0), NewSA)); } + + if (KnownOne.intersects(SignBit)) + // New bits are known one. + KnownOne |= HighBits; } break; case ISD::SIGN_EXTEND_INREG: { @@ -1066,7 +1120,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, - DAGCombinerInfo &DCI, DebugLoc dl) const { + DAGCombinerInfo &DCI, SDLoc dl) const { SelectionDAG &DAG = DCI.DAG; // These setcc operations always fold. @@ -1075,13 +1129,20 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, case ISD::SETFALSE: case ISD::SETFALSE2: return DAG.getConstant(0, VT); case ISD::SETTRUE: - case ISD::SETTRUE2: return DAG.getConstant(1, VT); + case ISD::SETTRUE2: { + TargetLowering::BooleanContent Cnt = getBooleanContents(VT.isVector()); + return DAG.getConstant( + Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); + } } // Ensure that the constant occurs on the RHS, and fold constant // comparisons. - if (isa<ConstantSDNode>(N0.getNode())) - return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); + ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); + if (isa<ConstantSDNode>(N0.getNode()) && + (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) + return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); @@ -1160,7 +1221,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } // Make sure we're not losing bits from the constant. - if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) { + if (MinBits > 0 && + MinBits < C1.getBitWidth() && MinBits >= C1.getActiveBits()) { EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { // Will get folded away. @@ -1175,6 +1237,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // the test is for equality or unsigned, and all 1 bits of the const are // in the same partial word, see if we can shorten the load. if (DCI.isBeforeLegalize() && + !ISD::isSignedIntSetCC(Cond) && N0.getOpcode() == ISD::AND && C1 == 0 && N0.getNode()->hasOneUse() && isa<LoadSDNode>(N0.getOperand(0)) && @@ -1319,7 +1382,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType().isInteger()); - return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); } if ((N0.getOpcode() == ISD::XOR || @@ -1756,16 +1821,22 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) { if (ValueHasExactlyOneBitSet(N1, DAG)) { Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); - SDValue Zero = DAG.getConstant(0, N1.getValueType()); - return DAG.getSetCC(dl, VT, N0, Zero, Cond); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(Cond, N0.getSimpleValueType())) { + SDValue Zero = DAG.getConstant(0, N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, Zero, Cond); + } } } if (N1.getOpcode() == ISD::AND) if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) { if (ValueHasExactlyOneBitSet(N0, DAG)) { Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); - SDValue Zero = DAG.getConstant(0, N0.getValueType()); - return DAG.getSetCC(dl, VT, N1, Zero, Cond); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(Cond, N1.getSimpleValueType())) { + SDValue Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(dl, VT, N1, Zero, Cond); + } } } } @@ -1966,7 +2037,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, int64_t Offs = GA->getOffset(); if (C) Offs += C->getZExtValue(); Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), - C ? C->getDebugLoc() : DebugLoc(), + C ? SDLoc(C) : SDLoc(), Op.getValueType(), Offs)); return; } @@ -1989,8 +2060,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { - if (Constraint[0] != '{') + MVT VT) const { + if (Constraint.empty() || Constraint[0] != '{') return std::make_pair(0u, static_cast<TargetRegisterClass*>(0)); assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); @@ -2139,8 +2210,9 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( break; } } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) { - OpInfo.ConstraintVT = MVT::getIntegerVT( - 8*getDataLayout()->getPointerSize(PT->getAddressSpace())); + unsigned PtrSize + = getDataLayout()->getPointerSizeInBits(PT->getAddressSpace()); + OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize); } else { OpInfo.ConstraintVT = MVT::getVT(OpTy, true); } @@ -2435,9 +2507,9 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, } } -/// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication +/// \brief Given an exact SDIV by a constant, create a multiplication /// with the multiplicative inverse of the constant. -SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, +SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, SelectionDAG &DAG) const { ConstantSDNode *C = cast<ConstantSDNode>(Op2); APInt d = C->getAPIntValue(); @@ -2461,7 +2533,7 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2); } -/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, +/// \brief Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> @@ -2469,7 +2541,7 @@ SDValue TargetLowering:: BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, std::vector<SDNode*> *Created) const { EVT VT = N->getValueType(0); - DebugLoc dl= N->getDebugLoc(); + SDLoc dl(N); // Check to see if we can do this. // FIXME: We should be more aggressive here. @@ -2521,7 +2593,7 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, return DAG.getNode(ISD::ADD, dl, VT, Q, T); } -/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// \brief Given an ISD::UDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> @@ -2529,7 +2601,7 @@ SDValue TargetLowering:: BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, std::vector<SDNode*> *Created) const { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Check to see if we can do this. // FIXME: We should be more aggressive here. diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp deleted file mode 100644 index 2feea59..0000000 --- a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp +++ /dev/null @@ -1,1152 +0,0 @@ -//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a shrink wrapping variant of prolog/epilog insertion: -// - Spills and restores of callee-saved registers (CSRs) are placed in the -// machine CFG to tightly surround their uses so that execution paths that -// do not use CSRs do not pay the spill/restore penalty. -// -// - Avoiding placment of spills/restores in loops: if a CSR is used inside a -// loop the spills are placed in the loop preheader, and restores are -// placed in the loop exit nodes (the successors of loop _exiting_ nodes). -// -// - Covering paths without CSR uses: -// If a region in a CFG uses CSRs and has multiple entry and/or exit points, -// the use info for the CSRs inside the region is propagated outward in the -// CFG to ensure validity of the spill/restore placements. This decreases -// the effectiveness of shrink wrapping but does not require edge splitting -// in the machine CFG. -// -// This shrink wrapping implementation uses an iterative analysis to determine -// which basic blocks require spills and restores for CSRs. -// -// This pass uses MachineDominators and MachineLoopInfo. Loop information -// is used to prevent placement of callee-saved register spills/restores -// in the bodies of loops. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "shrink-wrap" - -#include "PrologEpilogInserter.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SparseBitVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include <sstream> - -using namespace llvm; - -STATISTIC(numSRReduced, "Number of CSR spills+restores reduced."); - -// Shrink Wrapping: -static cl::opt<bool> -ShrinkWrapping("shrink-wrap", - cl::desc("Shrink wrap callee-saved register spills/restores")); - -// Shrink wrap only the specified function, a debugging aid. -static cl::opt<std::string> -ShrinkWrapFunc("shrink-wrap-func", cl::Hidden, - cl::desc("Shrink wrap the specified function"), - cl::value_desc("funcname"), - cl::init("")); - -// Debugging level for shrink wrapping. -enum ShrinkWrapDebugLevel { - Disabled, BasicInfo, Iterations, Details -}; - -static cl::opt<enum ShrinkWrapDebugLevel> -ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden, - cl::desc("Print shrink wrapping debugging information"), - cl::values( - clEnumVal(Disabled , "disable debug output"), - clEnumVal(BasicInfo , "print basic DF sets"), - clEnumVal(Iterations, "print SR sets for each iteration"), - clEnumVal(Details , "print all DF sets"), - clEnumValEnd)); - - -void PEI::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - if (ShrinkWrapping || ShrinkWrapFunc != "") { - AU.addRequired<MachineLoopInfo>(); - AU.addRequired<MachineDominatorTree>(); - } - AU.addPreserved<MachineLoopInfo>(); - AU.addPreserved<MachineDominatorTree>(); - AU.addRequired<TargetPassConfig>(); - MachineFunctionPass::getAnalysisUsage(AU); -} - -//===----------------------------------------------------------------------===// -// ShrinkWrapping implementation -//===----------------------------------------------------------------------===// - -// Convienences for dealing with machine loops. -MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) { - assert(LP && "Machine loop is NULL."); - MachineBasicBlock* PHDR = LP->getLoopPreheader(); - MachineLoop* PLP = LP->getParentLoop(); - while (PLP) { - PHDR = PLP->getLoopPreheader(); - PLP = PLP->getParentLoop(); - } - return PHDR; -} - -MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) { - if (LP == 0) - return 0; - MachineLoop* PLP = LP->getParentLoop(); - while (PLP) { - LP = PLP; - PLP = PLP->getParentLoop(); - } - return LP; -} - -bool PEI::isReturnBlock(MachineBasicBlock* MBB) { - return (MBB && !MBB->empty() && MBB->back().isReturn()); -} - -// Initialize shrink wrapping DFA sets, called before iterations. -void PEI::clearAnticAvailSets() { - AnticIn.clear(); - AnticOut.clear(); - AvailIn.clear(); - AvailOut.clear(); -} - -// Clear all sets constructed by shrink wrapping. -void PEI::clearAllSets() { - ReturnBlocks.clear(); - clearAnticAvailSets(); - UsedCSRegs.clear(); - CSRUsed.clear(); - TLLoops.clear(); - CSRSave.clear(); - CSRRestore.clear(); -} - -// Initialize all shrink wrapping data. -void PEI::initShrinkWrappingInfo() { - clearAllSets(); - EntryBlock = 0; -#ifndef NDEBUG - HasFastExitPath = false; -#endif - ShrinkWrapThisFunction = ShrinkWrapping; - // DEBUG: enable or disable shrink wrapping for the current function - // via --shrink-wrap-func=<funcname>. -#ifndef NDEBUG - if (ShrinkWrapFunc != "") { - std::string MFName = MF->getName().str(); - ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); - } -#endif -} - - -/// placeCSRSpillsAndRestores - determine which MBBs of the function -/// need save, restore code for callee-saved registers by doing a DF analysis -/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs -/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo -/// is used to ensure that CSR save/restore code is not placed inside loops. -/// This function computes the maps of MBBs -> CSRs to spill and restore -/// in CSRSave, CSRRestore. -/// -/// If shrink wrapping is not being performed, place all spills in -/// the entry block, all restores in return blocks. In this case, -/// CSRSave has a single mapping, CSRRestore has mappings for each -/// return block. -/// -void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { - - DEBUG(MF = &Fn); - - initShrinkWrappingInfo(); - - DEBUG(if (ShrinkWrapThisFunction) { - dbgs() << "Place CSR spills/restores for " - << MF->getName() << "\n"; - }); - - if (calculateSets(Fn)) - placeSpillsAndRestores(Fn); -} - -/// calcAnticInOut - calculate the anticipated in/out reg sets -/// for the given MBB by looking forward in the MCFG at MBB's -/// successors. -/// -bool PEI::calcAnticInOut(MachineBasicBlock* MBB) { - bool changed = false; - - // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB)) - SmallVector<MachineBasicBlock*, 4> successors; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (SUCC != MBB) - successors.push_back(SUCC); - } - - unsigned i = 0, e = successors.size(); - if (i != e) { - CSRegSet prevAnticOut = AnticOut[MBB]; - MachineBasicBlock* SUCC = successors[i]; - - AnticOut[MBB] = AnticIn[SUCC]; - for (++i; i != e; ++i) { - SUCC = successors[i]; - AnticOut[MBB] &= AnticIn[SUCC]; - } - if (prevAnticOut != AnticOut[MBB]) - changed = true; - } - - // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]); - CSRegSet prevAnticIn = AnticIn[MBB]; - AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; - if (prevAnticIn != AnticIn[MBB]) - changed = true; - return changed; -} - -/// calcAvailInOut - calculate the available in/out reg sets -/// for the given MBB by looking backward in the MCFG at MBB's -/// predecessors. -/// -bool PEI::calcAvailInOut(MachineBasicBlock* MBB) { - bool changed = false; - - // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB)) - SmallVector<MachineBasicBlock*, 4> predecessors; - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - if (PRED != MBB) - predecessors.push_back(PRED); - } - - unsigned i = 0, e = predecessors.size(); - if (i != e) { - CSRegSet prevAvailIn = AvailIn[MBB]; - MachineBasicBlock* PRED = predecessors[i]; - - AvailIn[MBB] = AvailOut[PRED]; - for (++i; i != e; ++i) { - PRED = predecessors[i]; - AvailIn[MBB] &= AvailOut[PRED]; - } - if (prevAvailIn != AvailIn[MBB]) - changed = true; - } - - // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]); - CSRegSet prevAvailOut = AvailOut[MBB]; - AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; - if (prevAvailOut != AvailOut[MBB]) - changed = true; - return changed; -} - -/// calculateAnticAvail - build the sets anticipated and available -/// registers in the MCFG of the current function iteratively, -/// doing a combined forward and backward analysis. -/// -void PEI::calculateAnticAvail(MachineFunction &Fn) { - // Initialize data flow sets. - clearAnticAvailSets(); - - // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. - bool changed = true; - unsigned iterations = 0; - while (changed) { - changed = false; - ++iterations; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - - // Calculate anticipated in, out regs at MBB from - // anticipated at successors of MBB. - changed |= calcAnticInOut(MBB); - - // Calculate available in, out regs at MBB from - // available at predecessors of MBB. - changed |= calcAvailInOut(MBB); - } - } - - DEBUG({ - if (ShrinkWrapDebugging >= Details) { - dbgs() - << "-----------------------------------------------------------\n" - << " Antic/Avail Sets:\n" - << "-----------------------------------------------------------\n" - << "iterations = " << iterations << "\n" - << "-----------------------------------------------------------\n" - << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n" - << "-----------------------------------------------------------\n"; - - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - dumpSets(MBB); - } - - dbgs() - << "-----------------------------------------------------------\n"; - } - }); -} - -/// propagateUsesAroundLoop - copy used register info from MBB to all blocks -/// of the loop given by LP and its parent loops. This prevents spills/restores -/// from being placed in the bodies of loops. -/// -void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) { - if (! MBB || !LP) - return; - - std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks(); - for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) { - MachineBasicBlock* LBB = loopBlocks[i]; - if (LBB == MBB) - continue; - if (CSRUsed[LBB].contains(CSRUsed[MBB])) - continue; - CSRUsed[LBB] |= CSRUsed[MBB]; - } -} - -/// calculateSets - collect the CSRs used in this function, compute -/// the DF sets that describe the initial minimal regions in the -/// Machine CFG around which CSR spills and restores must be placed. -/// -/// Additionally, this function decides if shrink wrapping should -/// be disabled for the current function, checking the following: -/// 1. the current function has more than 500 MBBs: heuristic limit -/// on function size to reduce compile time impact of the current -/// iterative algorithm. -/// 2. all CSRs are used in the entry block. -/// 3. all CSRs are used in all immediate successors of the entry block. -/// 4. all CSRs are used in a subset of blocks, each of which dominates -/// all return blocks. These blocks, taken as a subgraph of the MCFG, -/// are equivalent to the entry block since all execution paths pass -/// through them. -/// -bool PEI::calculateSets(MachineFunction &Fn) { - // Sets used to compute spill, restore placement sets. - const std::vector<CalleeSavedInfo> CSI = - Fn.getFrameInfo()->getCalleeSavedInfo(); - - // If no CSRs used, we are done. - if (CSI.empty()) { - DEBUG(if (ShrinkWrapThisFunction) - dbgs() << "DISABLED: " << Fn.getName() - << ": uses no callee-saved registers\n"); - return false; - } - - // Save refs to entry and return blocks. - EntryBlock = Fn.begin(); - for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); - MBB != E; ++MBB) - if (isReturnBlock(MBB)) - ReturnBlocks.push_back(MBB); - - // Determine if this function has fast exit paths. - DEBUG(if (ShrinkWrapThisFunction) - findFastExitPath()); - - // Limit shrink wrapping via the current iterative bit vector - // implementation to functions with <= 500 MBBs. - if (Fn.size() > 500) { - DEBUG(if (ShrinkWrapThisFunction) - dbgs() << "DISABLED: " << Fn.getName() - << ": too large (" << Fn.size() << " MBBs)\n"); - ShrinkWrapThisFunction = false; - } - - // Return now if not shrink wrapping. - if (! ShrinkWrapThisFunction) - return false; - - // Collect set of used CSRs. - for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { - UsedCSRegs.set(inx); - } - - // Walk instructions in all MBBs, create CSRUsed[] sets, choose - // whether or not to shrink wrap this function. - MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); - MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>(); - const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); - - bool allCSRUsesInEntryBlock = true; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) { - for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { - unsigned Reg = CSI[inx].getReg(); - // If instruction I reads or modifies Reg, add it to UsedCSRegs, - // CSRUsed map for the current block. - for (unsigned opInx = 0, opEnd = I->getNumOperands(); - opInx != opEnd; ++opInx) { - const MachineOperand &MO = I->getOperand(opInx); - if (! (MO.isReg() && (MO.isUse() || MO.isDef()))) - continue; - unsigned MOReg = MO.getReg(); - if (!MOReg) - continue; - if (MOReg == Reg || - (TargetRegisterInfo::isPhysicalRegister(MOReg) && - TargetRegisterInfo::isPhysicalRegister(Reg) && - TRI->isSubRegister(Reg, MOReg))) { - // CSR Reg is defined/used in block MBB. - CSRUsed[MBB].set(inx); - // Check for uses in EntryBlock. - if (MBB != EntryBlock) - allCSRUsesInEntryBlock = false; - } - } - } - } - - if (CSRUsed[MBB].empty()) - continue; - - // Propagate CSRUsed[MBB] in loops - if (MachineLoop* LP = LI.getLoopFor(MBB)) { - // Add top level loop to work list. - MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP); - MachineLoop* PLP = getTopLevelLoopParent(LP); - - if (! HDR) { - HDR = PLP->getHeader(); - assert(HDR->pred_size() > 0 && "Loop header has no predecessors?"); - MachineBasicBlock::pred_iterator PI = HDR->pred_begin(); - HDR = *PI; - } - TLLoops[HDR] = PLP; - - // Push uses from inside loop to its parent loops, - // or to all other MBBs in its loop. - if (LP->getLoopDepth() > 1) { - for (MachineLoop* PLP = LP->getParentLoop(); PLP; - PLP = PLP->getParentLoop()) { - propagateUsesAroundLoop(MBB, PLP); - } - } else { - propagateUsesAroundLoop(MBB, LP); - } - } - } - - if (allCSRUsesInEntryBlock) { - DEBUG(dbgs() << "DISABLED: " << Fn.getName() - << ": all CSRs used in EntryBlock\n"); - ShrinkWrapThisFunction = false; - } else { - bool allCSRsUsedInEntryFanout = true; - for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), - SE = EntryBlock->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (CSRUsed[SUCC] != UsedCSRegs) - allCSRsUsedInEntryFanout = false; - } - if (allCSRsUsedInEntryFanout) { - DEBUG(dbgs() << "DISABLED: " << Fn.getName() - << ": all CSRs used in imm successors of EntryBlock\n"); - ShrinkWrapThisFunction = false; - } - } - - if (ShrinkWrapThisFunction) { - // Check if MBB uses CSRs and dominates all exit nodes. - // Such nodes are equiv. to the entry node w.r.t. - // CSR uses: every path through the function must - // pass through this node. If each CSR is used at least - // once by these nodes, shrink wrapping is disabled. - CSRegSet CSRUsedInChokePoints; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1) - continue; - bool dominatesExitNodes = true; - for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) - if (! DT.dominates(MBB, ReturnBlocks[ri])) { - dominatesExitNodes = false; - break; - } - if (dominatesExitNodes) { - CSRUsedInChokePoints |= CSRUsed[MBB]; - if (CSRUsedInChokePoints == UsedCSRegs) { - DEBUG(dbgs() << "DISABLED: " << Fn.getName() - << ": all CSRs used in choke point(s) at " - << getBasicBlockName(MBB) << "\n"); - ShrinkWrapThisFunction = false; - break; - } - } - } - } - - // Return now if we have decided not to apply shrink wrapping - // to the current function. - if (! ShrinkWrapThisFunction) - return false; - - DEBUG({ - dbgs() << "ENABLED: " << Fn.getName(); - if (HasFastExitPath) - dbgs() << " (fast exit path)"; - dbgs() << "\n"; - if (ShrinkWrapDebugging >= BasicInfo) { - dbgs() << "------------------------------" - << "-----------------------------\n"; - dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; - if (ShrinkWrapDebugging >= Details) { - dbgs() << "------------------------------" - << "-----------------------------\n"; - dumpAllUsed(); - } - } - }); - - // Build initial DF sets to determine minimal regions in the - // Machine CFG around which CSRs must be spilled and restored. - calculateAnticAvail(Fn); - - return true; -} - -/// addUsesForMEMERegion - add uses of CSRs spilled or restored in -/// multi-entry, multi-exit (MEME) regions so spill and restore -/// placement will not break code that enters or leaves a -/// shrink-wrapped region by inducing spills with no matching -/// restores or restores with no matching spills. A MEME region -/// is a subgraph of the MCFG with multiple entry edges, multiple -/// exit edges, or both. This code propagates use information -/// through the MCFG until all paths requiring spills and restores -/// _outside_ the computed minimal placement regions have been covered. -/// -bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4>& blks) { - if (MBB->succ_size() < 2 && MBB->pred_size() < 2) { - bool processThisBlock = false; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (SUCC->pred_size() > 1) { - processThisBlock = true; - break; - } - } - if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) { - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - if (PRED->succ_size() > 1) { - processThisBlock = true; - break; - } - } - } - if (! processThisBlock) - return false; - } - - CSRegSet prop; - if (!CSRSave[MBB].empty()) - prop = CSRSave[MBB]; - else if (!CSRRestore[MBB].empty()) - prop = CSRRestore[MBB]; - else - prop = CSRUsed[MBB]; - if (prop.empty()) - return false; - - // Propagate selected bits to successors, predecessors of MBB. - bool addedUses = false; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - // Self-loop - if (SUCC == MBB) - continue; - if (! CSRUsed[SUCC].contains(prop)) { - CSRUsed[SUCC] |= prop; - addedUses = true; - blks.push_back(SUCC); - DEBUG(if (ShrinkWrapDebugging >= Iterations) - dbgs() << getBasicBlockName(MBB) - << "(" << stringifyCSRegSet(prop) << ")->" - << "successor " << getBasicBlockName(SUCC) << "\n"); - } - } - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - // Self-loop - if (PRED == MBB) - continue; - if (! CSRUsed[PRED].contains(prop)) { - CSRUsed[PRED] |= prop; - addedUses = true; - blks.push_back(PRED); - DEBUG(if (ShrinkWrapDebugging >= Iterations) - dbgs() << getBasicBlockName(MBB) - << "(" << stringifyCSRegSet(prop) << ")->" - << "predecessor " << getBasicBlockName(PRED) << "\n"); - } - } - return addedUses; -} - -/// addUsesForTopLevelLoops - add uses for CSRs used inside top -/// level loops to the exit blocks of those loops. -/// -bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) { - bool addedUses = false; - - // Place restores for top level loops where needed. - for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator - I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) { - MachineBasicBlock* MBB = I->first; - MachineLoop* LP = I->second; - MachineBasicBlock* HDR = LP->getHeader(); - SmallVector<MachineBasicBlock*, 4> exitBlocks; - CSRegSet loopSpills; - - loopSpills = CSRSave[MBB]; - if (CSRSave[MBB].empty()) { - loopSpills = CSRUsed[HDR]; - assert(!loopSpills.empty() && "No CSRs used in loop?"); - } else if (CSRRestore[MBB].contains(CSRSave[MBB])) - continue; - - LP->getExitBlocks(exitBlocks); - assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?"); - for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) { - MachineBasicBlock* EXB = exitBlocks[i]; - if (! CSRUsed[EXB].contains(loopSpills)) { - CSRUsed[EXB] |= loopSpills; - addedUses = true; - DEBUG(if (ShrinkWrapDebugging >= Iterations) - dbgs() << "LOOP " << getBasicBlockName(MBB) - << "(" << stringifyCSRegSet(loopSpills) << ")->" - << getBasicBlockName(EXB) << "\n"); - if (EXB->succ_size() > 1 || EXB->pred_size() > 1) - blks.push_back(EXB); - } - } - } - return addedUses; -} - -/// calcSpillPlacements - determine which CSRs should be spilled -/// in MBB using AnticIn sets of MBB's predecessors, keeping track -/// of changes to spilled reg sets. Add MBB to the set of blocks -/// that need to be processed for propagating use info to cover -/// multi-entry/exit regions. -/// -bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4> &blks, - CSRegBlockMap &prevSpills) { - bool placedSpills = false; - // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB) - CSRegSet anticInPreds; - SmallVector<MachineBasicBlock*, 4> predecessors; - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - if (PRED != MBB) - predecessors.push_back(PRED); - } - unsigned i = 0, e = predecessors.size(); - if (i != e) { - MachineBasicBlock* PRED = predecessors[i]; - anticInPreds = UsedCSRegs - AnticIn[PRED]; - for (++i; i != e; ++i) { - PRED = predecessors[i]; - anticInPreds &= (UsedCSRegs - AnticIn[PRED]); - } - } else { - // Handle uses in entry blocks (which have no predecessors). - // This is necessary because the DFA formulation assumes the - // entry and (multiple) exit nodes cannot have CSR uses, which - // is not the case in the real world. - anticInPreds = UsedCSRegs; - } - // Compute spills required at MBB: - CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds; - - if (! CSRSave[MBB].empty()) { - if (MBB == EntryBlock) { - for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) - CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB]; - } else { - // Reset all regs spilled in MBB that are also spilled in EntryBlock. - if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) { - CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock]; - } - } - } - placedSpills = (CSRSave[MBB] != prevSpills[MBB]); - prevSpills[MBB] = CSRSave[MBB]; - // Remember this block for adding restores to successor - // blocks for multi-entry region. - if (placedSpills) - blks.push_back(MBB); - - DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) - dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]) << "\n"); - - return placedSpills; -} - -/// calcRestorePlacements - determine which CSRs should be restored -/// in MBB using AvailOut sets of MBB's succcessors, keeping track -/// of changes to restored reg sets. Add MBB to the set of blocks -/// that need to be processed for propagating use info to cover -/// multi-entry/exit regions. -/// -bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, - SmallVector<MachineBasicBlock*, 4> &blks, - CSRegBlockMap &prevRestores) { - bool placedRestores = false; - // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB) - CSRegSet availOutSucc; - SmallVector<MachineBasicBlock*, 4> successors; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (SUCC != MBB) - successors.push_back(SUCC); - } - unsigned i = 0, e = successors.size(); - if (i != e) { - MachineBasicBlock* SUCC = successors[i]; - availOutSucc = UsedCSRegs - AvailOut[SUCC]; - for (++i; i != e; ++i) { - SUCC = successors[i]; - availOutSucc &= (UsedCSRegs - AvailOut[SUCC]); - } - } else { - if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) { - // Handle uses in return blocks (which have no successors). - // This is necessary because the DFA formulation assumes the - // entry and (multiple) exit nodes cannot have CSR uses, which - // is not the case in the real world. - availOutSucc = UsedCSRegs; - } - } - // Compute restores required at MBB: - CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc; - - // Postprocess restore placements at MBB. - // Remove the CSRs that are restored in the return blocks. - // Lest this be confusing, note that: - // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks. - if (MBB->succ_size() && ! CSRRestore[MBB].empty()) { - if (! CSRSave[EntryBlock].empty()) - CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock]; - } - placedRestores = (CSRRestore[MBB] != prevRestores[MBB]); - prevRestores[MBB] = CSRRestore[MBB]; - // Remember this block for adding saves to predecessor - // blocks for multi-entry region. - if (placedRestores) - blks.push_back(MBB); - - DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) - dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); - - return placedRestores; -} - -/// placeSpillsAndRestores - place spills and restores of CSRs -/// used in MBBs in minimal regions that contain the uses. -/// -void PEI::placeSpillsAndRestores(MachineFunction &Fn) { - CSRegBlockMap prevCSRSave; - CSRegBlockMap prevCSRRestore; - SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks; - bool changed = true; - unsigned iterations = 0; - - // Iterate computation of spill and restore placements in the MCFG until: - // 1. CSR use info has been fully propagated around the MCFG, and - // 2. computation of CSRSave[], CSRRestore[] reach fixed points. - while (changed) { - changed = false; - ++iterations; - - DEBUG(if (ShrinkWrapDebugging >= Iterations) - dbgs() << "iter " << iterations - << " --------------------------------------------------\n"); - - // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, - // which determines the placements of spills and restores. - // Keep track of changes to spills, restores in each iteration to - // minimize the total iterations. - bool SRChanged = false; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - - // Place spills for CSRs in MBB. - SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave); - - // Place restores for CSRs in MBB. - SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore); - } - - // Add uses of CSRs used inside loops where needed. - changed |= addUsesForTopLevelLoops(cvBlocks); - - // Add uses for CSRs spilled or restored at branch, join points. - if (changed || SRChanged) { - while (! cvBlocks.empty()) { - MachineBasicBlock* MBB = cvBlocks.pop_back_val(); - changed |= addUsesForMEMERegion(MBB, ncvBlocks); - } - if (! ncvBlocks.empty()) { - cvBlocks = ncvBlocks; - ncvBlocks.clear(); - } - } - - if (changed) { - calculateAnticAvail(Fn); - CSRSave.clear(); - CSRRestore.clear(); - } - } - - // Check for effectiveness: - // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks} - // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock] - // Gives a measure of how many CSR spills have been moved from EntryBlock - // to minimal regions enclosing their uses. - CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]); - unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); - numSRReduced += numSRReducedThisFunc; - DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { - dbgs() << "-----------------------------------------------------------\n"; - dbgs() << "total iterations = " << iterations << " ( " - << Fn.getName() - << " " << numSRReducedThisFunc - << " " << Fn.size() - << " )\n"; - dbgs() << "-----------------------------------------------------------\n"; - dumpSRSets(); - dbgs() << "-----------------------------------------------------------\n"; - if (numSRReducedThisFunc) - verifySpillRestorePlacement(); - }); -} - -// Debugging methods. -#ifndef NDEBUG -/// findFastExitPath - debugging method used to detect functions -/// with at least one path from the entry block to a return block -/// directly or which has a very small number of edges. -/// -void PEI::findFastExitPath() { - if (! EntryBlock) - return; - // Fina a path from EntryBlock to any return block that does not branch: - // Entry - // | ... - // v | - // B1<-----+ - // | - // v - // Return - for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), - SE = EntryBlock->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - - // Assume positive, disprove existence of fast path. - HasFastExitPath = true; - - // Check the immediate successors. - if (isReturnBlock(SUCC)) { - if (ShrinkWrapDebugging >= BasicInfo) - dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) - << "->" << getBasicBlockName(SUCC) << "\n"; - break; - } - // Traverse df from SUCC, look for a branch block. - std::string exitPath = getBasicBlockName(SUCC); - for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC), - BE = df_end(SUCC); BI != BE; ++BI) { - MachineBasicBlock* SBB = *BI; - // Reject paths with branch nodes. - if (SBB->succ_size() > 1) { - HasFastExitPath = false; - break; - } - exitPath += "->" + getBasicBlockName(SBB); - } - if (HasFastExitPath) { - if (ShrinkWrapDebugging >= BasicInfo) - dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) - << "->" << exitPath << "\n"; - break; - } - } -} - -/// verifySpillRestorePlacement - check the current spill/restore -/// sets for safety. Attempt to find spills without restores or -/// restores without spills. -/// Spills: walk df from each MBB in spill set ensuring that -/// all CSRs spilled at MMBB are restored on all paths -/// from MBB to all exit blocks. -/// Restores: walk idf from each MBB in restore set ensuring that -/// all CSRs restored at MBB are spilled on all paths -/// reaching MBB. -/// -void PEI::verifySpillRestorePlacement() { - unsigned numReturnBlocks = 0; - for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - if (isReturnBlock(MBB) || MBB->succ_size() == 0) - ++numReturnBlocks; - } - for (CSRegBlockMap::iterator BI = CSRSave.begin(), - BE = CSRSave.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet spilled = BI->second; - CSRegSet restored; - - if (spilled.empty()) - continue; - - DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(spilled) - << " RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); - - if (CSRRestore[MBB].intersects(spilled)) { - restored |= (CSRRestore[MBB] & spilled); - } - - // Walk depth first from MBB to find restores of all CSRs spilled at MBB: - // we must find restores for all spills w/no intervening spills on all - // paths from MBB to all return blocks. - for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB), - BE = df_end(MBB); BI != BE; ++BI) { - MachineBasicBlock* SBB = *BI; - if (SBB == MBB) - continue; - // Stop when we encounter spills of any CSRs spilled at MBB that - // have not yet been seen to be restored. - if (CSRSave[SBB].intersects(spilled) && - !restored.contains(CSRSave[SBB] & spilled)) - break; - // Collect the CSRs spilled at MBB that are restored - // at this DF successor of MBB. - if (CSRRestore[SBB].intersects(spilled)) - restored |= (CSRRestore[SBB] & spilled); - // If we are at a retun block, check that the restores - // we have seen so far exhaust the spills at MBB, then - // reset the restores. - if (isReturnBlock(SBB) || SBB->succ_size() == 0) { - if (restored != spilled) { - CSRegSet notRestored = (spilled - restored); - DEBUG(dbgs() << MF->getName() << ": " - << stringifyCSRegSet(notRestored) - << " spilled at " << getBasicBlockName(MBB) - << " are never restored on path to return " - << getBasicBlockName(SBB) << "\n"); - } - restored.clear(); - } - } - } - - // Check restore placements. - for (CSRegBlockMap::iterator BI = CSRRestore.begin(), - BE = CSRRestore.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet restored = BI->second; - CSRegSet spilled; - - if (restored.empty()) - continue; - - DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]) - << " RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(restored) << "\n"); - - if (CSRSave[MBB].intersects(restored)) { - spilled |= (CSRSave[MBB] & restored); - } - // Walk inverse depth first from MBB to find spills of all - // CSRs restored at MBB: - for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB), - BE = idf_end(MBB); BI != BE; ++BI) { - MachineBasicBlock* PBB = *BI; - if (PBB == MBB) - continue; - // Stop when we encounter restores of any CSRs restored at MBB that - // have not yet been seen to be spilled. - if (CSRRestore[PBB].intersects(restored) && - !spilled.contains(CSRRestore[PBB] & restored)) - break; - // Collect the CSRs restored at MBB that are spilled - // at this DF predecessor of MBB. - if (CSRSave[PBB].intersects(restored)) - spilled |= (CSRSave[PBB] & restored); - } - if (spilled != restored) { - CSRegSet notSpilled = (restored - spilled); - DEBUG(dbgs() << MF->getName() << ": " - << stringifyCSRegSet(notSpilled) - << " restored at " << getBasicBlockName(MBB) - << " are never spilled\n"); - } - } -} - -// Debugging print methods. -std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { - if (!MBB) - return ""; - - if (MBB->getBasicBlock()) - return MBB->getBasicBlock()->getName().str(); - - std::ostringstream name; - name << "_MBB_" << MBB->getNumber(); - return name.str(); -} - -std::string PEI::stringifyCSRegSet(const CSRegSet& s) { - const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); - const std::vector<CalleeSavedInfo> CSI = - MF->getFrameInfo()->getCalleeSavedInfo(); - - std::ostringstream srep; - if (CSI.size() == 0) { - srep << "[]"; - return srep.str(); - } - srep << "["; - CSRegSet::iterator I = s.begin(), E = s.end(); - if (I != E) { - unsigned reg = CSI[*I].getReg(); - srep << TRI->getName(reg); - for (++I; I != E; ++I) { - reg = CSI[*I].getReg(); - srep << ","; - srep << TRI->getName(reg); - } - } - srep << "]"; - return srep.str(); -} - -void PEI::dumpSet(const CSRegSet& s) { - DEBUG(dbgs() << stringifyCSRegSet(s) << "\n"); -} - -void PEI::dumpUsed(MachineBasicBlock* MBB) { - DEBUG({ - if (MBB) - dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; - }); -} - -void PEI::dumpAllUsed() { - for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - dumpUsed(MBB); - } -} - -void PEI::dumpSets(MachineBasicBlock* MBB) { - DEBUG({ - if (MBB) - dbgs() << getBasicBlockName(MBB) << " | " - << stringifyCSRegSet(CSRUsed[MBB]) << " | " - << stringifyCSRegSet(AnticIn[MBB]) << " | " - << stringifyCSRegSet(AnticOut[MBB]) << " | " - << stringifyCSRegSet(AvailIn[MBB]) << " | " - << stringifyCSRegSet(AvailOut[MBB]) << "\n"; - }); -} - -void PEI::dumpSets1(MachineBasicBlock* MBB) { - DEBUG({ - if (MBB) - dbgs() << getBasicBlockName(MBB) << " | " - << stringifyCSRegSet(CSRUsed[MBB]) << " | " - << stringifyCSRegSet(AnticIn[MBB]) << " | " - << stringifyCSRegSet(AnticOut[MBB]) << " | " - << stringifyCSRegSet(AvailIn[MBB]) << " | " - << stringifyCSRegSet(AvailOut[MBB]) << " | " - << stringifyCSRegSet(CSRSave[MBB]) << " | " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; - }); -} - -void PEI::dumpAllSets() { - for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - dumpSets1(MBB); - } -} - -void PEI::dumpSRSets() { - DEBUG({ - for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); - MBB != E; ++MBB) { - if (!CSRSave[MBB].empty()) { - dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]); - if (CSRRestore[MBB].empty()) - dbgs() << '\n'; - } - - if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty()) - dbgs() << " " - << "RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; - } - }); -} -#endif diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp index 3903743..da2e710 100644 --- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -42,48 +42,47 @@ STATISTIC(NumInvokes, "Number of invokes replaced"); STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { - class SjLjEHPrepare : public FunctionPass { - const TargetLoweringBase *TLI; - Type *FunctionContextTy; - Constant *RegisterFn; - Constant *UnregisterFn; - Constant *BuiltinSetjmpFn; - Constant *FrameAddrFn; - Constant *StackAddrFn; - Constant *StackRestoreFn; - Constant *LSDAAddrFn; - Value *PersonalityFn; - Constant *CallSiteFn; - Constant *FuncCtxFn; - AllocaInst *FuncCtx; - public: - static char ID; // Pass identification, replacement for typeid - explicit SjLjEHPrepare(const TargetLoweringBase *tli = NULL) - : FunctionPass(ID), TLI(tli) { } - bool doInitialization(Module &M); - bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const {} - const char *getPassName() const { - return "SJLJ Exception Handling preparation"; - } +class SjLjEHPrepare : public FunctionPass { + const TargetMachine *TM; + Type *FunctionContextTy; + Constant *RegisterFn; + Constant *UnregisterFn; + Constant *BuiltinSetjmpFn; + Constant *FrameAddrFn; + Constant *StackAddrFn; + Constant *StackRestoreFn; + Constant *LSDAAddrFn; + Value *PersonalityFn; + Constant *CallSiteFn; + Constant *FuncCtxFn; + AllocaInst *FuncCtx; + +public: + static char ID; // Pass identification, replacement for typeid + explicit SjLjEHPrepare(const TargetMachine *TM) : FunctionPass(ID), TM(TM) {} + bool doInitialization(Module &M); + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const {} + const char *getPassName() const { + return "SJLJ Exception Handling preparation"; + } - private: - bool setupEntryBlockAndCallSites(Function &F); - void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, - Value *SelVal); - Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads); - void lowerIncomingArguments(Function &F); - void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes); - void insertCallSiteStore(Instruction *I, int Number); - }; +private: + bool setupEntryBlockAndCallSites(Function &F); + void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal); + Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst *> LPads); + void lowerIncomingArguments(Function &F); + void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst *> Invokes); + void insertCallSiteStore(Instruction *I, int Number); +}; } // end anonymous namespace char SjLjEHPrepare::ID = 0; // Public Interface To the SjLjEHPrepare pass. -FunctionPass *llvm::createSjLjEHPreparePass(const TargetLoweringBase *TLI) { - return new SjLjEHPrepare(TLI); +FunctionPass *llvm::createSjLjEHPreparePass(const TargetMachine *TM) { + return new SjLjEHPrepare(TM); } // doInitialization - Set up decalarations and types needed to process // exceptions. @@ -92,23 +91,19 @@ bool SjLjEHPrepare::doInitialization(Module &M) { // builtin_setjmp uses a five word jbuf Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); Type *Int32Ty = Type::getInt32Ty(M.getContext()); - FunctionContextTy = - StructType::get(VoidPtrTy, // __prev - Int32Ty, // call_site - ArrayType::get(Int32Ty, 4), // __data - VoidPtrTy, // __personality - VoidPtrTy, // __lsda - ArrayType::get(VoidPtrTy, 5), // __jbuf - NULL); - RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register", - Type::getVoidTy(M.getContext()), - PointerType::getUnqual(FunctionContextTy), - (Type *)0); - UnregisterFn = - M.getOrInsertFunction("_Unwind_SjLj_Unregister", - Type::getVoidTy(M.getContext()), - PointerType::getUnqual(FunctionContextTy), - (Type *)0); + FunctionContextTy = StructType::get(VoidPtrTy, // __prev + Int32Ty, // call_site + ArrayType::get(Int32Ty, 4), // __data + VoidPtrTy, // __personality + VoidPtrTy, // __lsda + ArrayType::get(VoidPtrTy, 5), // __jbuf + NULL); + RegisterFn = M.getOrInsertFunction( + "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), (Type *)0); + UnregisterFn = M.getOrInsertFunction( + "_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), (Type *)0); FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); @@ -134,16 +129,17 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) { Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site"); // Insert a store of the call-site number - ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()), - Number); - Builder.CreateStore(CallSiteNoC, CallSite, true/*volatile*/); + ConstantInt *CallSiteNoC = + ConstantInt::get(Type::getInt32Ty(I->getContext()), Number); + Builder.CreateStore(CallSiteNoC, CallSite, true /*volatile*/); } /// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until /// we reach blocks we've already seen. static void MarkBlocksLiveIn(BasicBlock *BB, - SmallPtrSet<BasicBlock*, 64> &LiveBBs) { - if (!LiveBBs.insert(BB)) return; // already been here. + SmallPtrSet<BasicBlock *, 64> &LiveBBs) { + if (!LiveBBs.insert(BB)) + return; // already been here. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) MarkBlocksLiveIn(*PI, LiveBBs); @@ -153,12 +149,14 @@ static void MarkBlocksLiveIn(BasicBlock *BB, /// instruction with those returned by the personality function. void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal) { - SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end()); + SmallVector<Value *, 8> UseWorkList(LPI->use_begin(), LPI->use_end()); while (!UseWorkList.empty()) { Value *Val = UseWorkList.pop_back_val(); ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val); - if (!EVI) continue; - if (EVI->getNumIndices() != 1) continue; + if (!EVI) + continue; + if (EVI->getNumIndices() != 1) + continue; if (*EVI->idx_begin() == 0) EVI->replaceAllUsesWith(ExnVal); else if (*EVI->idx_begin() == 1) @@ -167,14 +165,15 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, EVI->eraseFromParent(); } - if (LPI->getNumUses() == 0) return; + if (LPI->getNumUses() == 0) + return; // There are still some uses of LPI. Construct an aggregate with the exception // values and replace the LPI with that aggregate. Type *LPadType = LPI->getType(); Value *LPadVal = UndefValue::get(LPadType); - IRBuilder<> - Builder(llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal)))); + IRBuilder<> Builder( + llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal)))); LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val"); @@ -183,17 +182,18 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, /// setupFunctionContext - Allocate the function context on the stack and fill /// it with all of the data that we know at this point. -Value *SjLjEHPrepare:: -setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { +Value *SjLjEHPrepare::setupFunctionContext(Function &F, + ArrayRef<LandingPadInst *> LPads) { BasicBlock *EntryBB = F.begin(); // Create an alloca for the incoming jump buffer ptr and the new jump buffer // that needs to be restored on all exits from the function. This is an alloca // because the value needs to be added to the global context list. + const TargetLowering *TLI = TM->getTargetLowering(); unsigned Align = - TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); - FuncCtx = - new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin()); + TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); + FuncCtx = new AllocaInst(FunctionContextTy, 0, Align, "fn_context", + EntryBB->begin()); // Fill in the function context structure. for (unsigned I = 0, E = LPads.size(); I != E; ++I) { @@ -204,13 +204,13 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data"); // The exception values come back in context->__data[0]. - Value *ExceptionAddr = Builder.CreateConstGEP2_32(FCData, 0, 0, - "exception_gep"); + Value *ExceptionAddr = + Builder.CreateConstGEP2_32(FCData, 0, 0, "exception_gep"); Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val"); ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy()); - Value *SelectorAddr = Builder.CreateConstGEP2_32(FCData, 0, 1, - "exn_selector_gep"); + Value *SelectorAddr = + Builder.CreateConstGEP2_32(FCData, 0, 1, "exn_selector_gep"); Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); substituteLPadValues(LPI, ExnVal, SelVal); @@ -220,9 +220,11 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { IRBuilder<> Builder(EntryBB->getTerminator()); if (!PersonalityFn) PersonalityFn = LPads[0]->getPersonalityFn(); - Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 3, - "pers_fn_gep"); - Builder.CreateStore(PersonalityFn, PersonalityFieldPtr, /*isVolatile=*/true); + Value *PersonalityFieldPtr = + Builder.CreateConstGEP2_32(FuncCtx, 0, 3, "pers_fn_gep"); + Builder.CreateStore( + Builder.CreateBitCast(PersonalityFn, Builder.getInt8PtrTy()), + PersonalityFieldPtr, /*isVolatile=*/true); // LSDA address Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr"); @@ -242,8 +244,8 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize())) ++AfterAllocaInsPt; - for (Function::arg_iterator - AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) { + for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; + ++AI) { Type *Ty = AI->getType(); // Aggregate types can't be cast, but are legal argument types, so we have @@ -262,9 +264,8 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { // This is always a no-op cast because we're casting AI to AI->getType() // so src and destination types are identical. BitCast is the only // possibility. - CastInst *NC = - new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp", - AfterAllocaInsPt); + CastInst *NC = new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp", + AfterAllocaInsPt); AI->replaceAllUsesWith(NC); // Set the operand of the cast instruction back to the AllocaInst. @@ -281,20 +282,21 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { /// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind /// edge and spill them. void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, - ArrayRef<InvokeInst*> Invokes) { + ArrayRef<InvokeInst *> Invokes) { // Finally, scan the code looking for instructions with bad live ranges. - for (Function::iterator - BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { - for (BasicBlock::iterator - II = BB->begin(), IIE = BB->end(); II != IIE; ++II) { + for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { + for (BasicBlock::iterator II = BB->begin(), IIE = BB->end(); II != IIE; + ++II) { // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. Instruction *Inst = II; - if (Inst->use_empty()) continue; + if (Inst->use_empty()) + continue; if (Inst->hasOneUse() && cast<Instruction>(Inst->use_back())->getParent() == BB && - !isa<PHINode>(Inst->use_back())) continue; + !isa<PHINode>(Inst->use_back())) + continue; // If this is an alloca in the entry block, it's not a real register // value. @@ -303,16 +305,16 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, continue; // Avoid iterator invalidation by copying users to a temporary vector. - SmallVector<Instruction*, 16> Users; - for (Value::use_iterator - UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { + SmallVector<Instruction *, 16> Users; + for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); + UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); if (User->getParent() != BB || isa<PHINode>(User)) Users.push_back(User); } // Find all of the blocks that this value is live in. - SmallPtrSet<BasicBlock*, 64> LiveBBs; + SmallPtrSet<BasicBlock *, 64> LiveBBs; LiveBBs.insert(Inst->getParent()); while (!Users.empty()) { Instruction *U = Users.back(); @@ -336,7 +338,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around " - << UnwindBlock->getName() << "\n"); + << UnwindBlock->getName() << "\n"); NeedsSpill = true; break; } @@ -359,15 +361,16 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, LandingPadInst *LPI = UnwindBlock->getLandingPadInst(); // Place PHIs into a set to avoid invalidating the iterator. - SmallPtrSet<PHINode*, 8> PHIsToDemote; - for (BasicBlock::iterator - PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN) + SmallPtrSet<PHINode *, 8> PHIsToDemote; + for (BasicBlock::iterator PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN) PHIsToDemote.insert(cast<PHINode>(PN)); - if (PHIsToDemote.empty()) continue; + if (PHIsToDemote.empty()) + continue; // Demote the PHIs to the stack. - for (SmallPtrSet<PHINode*, 8>::iterator - I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I) + for (SmallPtrSet<PHINode *, 8>::iterator I = PHIsToDemote.begin(), + E = PHIsToDemote.end(); + I != E; ++I) DemotePHIToStack(*I); // Move the landingpad instruction back to the top of the landing pad block. @@ -379,9 +382,9 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, /// the function context and marking the call sites with the appropriate /// values. These values are used by the DWARF EH emitter. bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { - SmallVector<ReturnInst*, 16> Returns; - SmallVector<InvokeInst*, 16> Invokes; - SmallSetVector<LandingPadInst*, 16> LPads; + SmallVector<ReturnInst *, 16> Returns; + SmallVector<InvokeInst *, 16> Invokes; + SmallSetVector<LandingPadInst *, 16> LPads; // Look through the terminators of the basic blocks to find invokes. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) @@ -401,7 +404,8 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { Returns.push_back(RI); } - if (Invokes.empty()) return false; + if (Invokes.empty()) + return false; NumInvokes += Invokes.size(); @@ -409,7 +413,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { lowerAcrossUnwindEdges(F, Invokes); Value *FuncCtx = - setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); + setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); BasicBlock *EntryBB = F.begin(); IRBuilder<> Builder(EntryBB->getTerminator()); @@ -443,7 +447,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { insertCallSiteStore(Invokes[I], I + 1); ConstantInt *CallSiteNum = - ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); + ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); // Record the call site value for the back end so it stays associated with // the invoke. @@ -465,8 +469,8 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { } // Register the function context and make sure it's known to not throw - CallInst *Register = CallInst::Create(RegisterFn, FuncCtx, "", - EntryBB->getTerminator()); + CallInst *Register = + CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator()); Register->setDoesNotThrow(); // Following any allocas not in the entry block, update the saved SP in the diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp index c5bbba3..10a93b7 100644 --- a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp @@ -31,8 +31,8 @@ #include "SpillPlacement.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/EdgeBundles.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" @@ -53,11 +53,16 @@ char &llvm::SpillPlacementID = SpillPlacement::ID; void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addRequired<MachineBlockFrequencyInfo>(); AU.addRequiredTransitive<EdgeBundles>(); AU.addRequiredTransitive<MachineLoopInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } +/// Decision threshold. A node gets the output value 0 if the weighted sum of +/// its inputs falls in the open interval (-Threshold;Threshold). +static const BlockFrequency Threshold = 2; + /// Node - Each edge bundle corresponds to a Hopfield node. /// /// The node contains precomputed frequency data that only depends on the CFG, @@ -68,31 +73,25 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { /// because all weights are positive. /// struct SpillPlacement::Node { - /// Scale - Inverse block frequency feeding into[0] or out of[1] the bundle. - /// Ideally, these two numbers should be identical, but inaccuracies in the - /// block frequency estimates means that we need to normalize ingoing and - /// outgoing frequencies separately so they are commensurate. - float Scale[2]; - - /// Bias - Normalized contributions from non-transparent blocks. - /// A bundle connected to a MustSpill block has a huge negative bias, - /// otherwise it is a number in the range [-2;2]. - float Bias; + /// BiasN - Sum of blocks that prefer a spill. + BlockFrequency BiasN; + /// BiasP - Sum of blocks that prefer a register. + BlockFrequency BiasP; /// Value - Output value of this node computed from the Bias and links. - /// This is always in the range [-1;1]. A positive number means the variable - /// should go in a register through this bundle. - float Value; + /// This is always on of the values {-1, 0, 1}. A positive number means the + /// variable should go in a register through this bundle. + int Value; - typedef SmallVector<std::pair<float, unsigned>, 4> LinkVector; + typedef SmallVector<std::pair<BlockFrequency, unsigned>, 4> LinkVector; /// Links - (Weight, BundleNo) for all transparent blocks connecting to other - /// bundles. The weights are all positive and add up to at most 2, weights - /// from ingoing and outgoing nodes separately add up to a most 1. The weight - /// sum can be less than 2 when the variable is not live into / out of some - /// connected basic blocks. + /// bundles. The weights are all positive block frequencies. LinkVector Links; + /// SumLinkWeights - Cached sum of the weights of all links + ThresHold. + BlockFrequency SumLinkWeights; + /// preferReg - Return true when this node prefers to be in a register. bool preferReg() const { // Undecided nodes (Value==0) go on the stack. @@ -101,28 +100,24 @@ struct SpillPlacement::Node { /// mustSpill - Return True if this node is so biased that it must spill. bool mustSpill() const { - // Actually, we must spill if Bias < sum(weights). - // It may be worth it to compute the weight sum here? - return Bias < -2.0f; - } - - /// Node - Create a blank Node. - Node() { - Scale[0] = Scale[1] = 0; + // We must spill if Bias < -sum(weights) or the MustSpill flag was set. + // BiasN is saturated when MustSpill is set, make sure this still returns + // true when the RHS saturates. Note that SumLinkWeights includes Threshold. + return BiasN >= BiasP + SumLinkWeights; } /// clear - Reset per-query data, but preserve frequencies that only depend on // the CFG. void clear() { - Bias = Value = 0; + BiasN = BiasP = Value = 0; + SumLinkWeights = Threshold; Links.clear(); } /// addLink - Add a link to bundle b with weight w. - /// out=0 for an ingoing link, and 1 for an outgoing link. - void addLink(unsigned b, float w, bool out) { - // Normalize w relative to all connected blocks from that direction. - w *= Scale[out]; + void addLink(unsigned b, BlockFrequency w) { + // Update cached sum. + SumLinkWeights += w; // There can be multiple links to the same bundle, add them up. for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) @@ -134,33 +129,48 @@ struct SpillPlacement::Node { Links.push_back(std::make_pair(w, b)); } - /// addBias - Bias this node from an ingoing[0] or outgoing[1] link. - /// Return the change to the total number of positive biases. - void addBias(float w, bool out) { - // Normalize w relative to all connected blocks from that direction. - w *= Scale[out]; - Bias += w; + /// addBias - Bias this node. + void addBias(BlockFrequency freq, BorderConstraint direction) { + switch (direction) { + default: + break; + case PrefReg: + BiasP += freq; + break; + case PrefSpill: + BiasN += freq; + break; + case MustSpill: + BiasN = BlockFrequency::getMaxFrequency(); + break; + } } /// update - Recompute Value from Bias and Links. Return true when node /// preference changes. bool update(const Node nodes[]) { // Compute the weighted sum of inputs. - float Sum = Bias; - for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) - Sum += I->first * nodes[I->second].Value; + BlockFrequency SumN = BiasN; + BlockFrequency SumP = BiasP; + for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) { + if (nodes[I->second].Value == -1) + SumN += I->first; + else if (nodes[I->second].Value == 1) + SumP += I->first; + } - // The weighted sum is going to be in the range [-2;2]. Ideally, we should - // simply set Value = sign(Sum), but we will add a dead zone around 0 for - // two reasons: + // Each weighted sum is going to be less than the total frequency of the + // bundle. Ideally, we should simply set Value = sign(SumP - SumN), but we + // will add a dead zone around 0 for two reasons: + // // 1. It avoids arbitrary bias when all links are 0 as is possible during // initial iterations. // 2. It helps tame rounding errors when the links nominally sum to 0. - const float Thres = 1e-4f; + // bool Before = preferReg(); - if (Sum < -Thres) + if (SumN >= SumP + Threshold) Value = -1; - else if (Sum > Thres) + else if (SumP >= SumN + Threshold) Value = 1; else Value = 0; @@ -177,22 +187,13 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { nodes = new Node[bundles->getNumBundles()]; // Compute total ingoing and outgoing block frequencies for all bundles. - BlockFrequency.resize(mf.getNumBlockIDs()); + BlockFrequencies.resize(mf.getNumBlockIDs()); + MachineBlockFrequencyInfo &MBFI = getAnalysis<MachineBlockFrequencyInfo>(); for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) { - float Freq = LiveIntervals::getSpillWeight(true, false, - loops->getLoopDepth(I)); unsigned Num = I->getNumber(); - BlockFrequency[Num] = Freq; - nodes[bundles->getBundle(Num, 1)].Scale[0] += Freq; - nodes[bundles->getBundle(Num, 0)].Scale[1] += Freq; + BlockFrequencies[Num] = MBFI.getBlockFreq(I); } - // Scales are reciprocal frequencies. - for (unsigned i = 0, e = bundles->getNumBundles(); i != e; ++i) - for (unsigned d = 0; d != 2; ++d) - if (nodes[i].Scale[d] > 0) - nodes[i].Scale[d] = 1 / nodes[i].Scale[d]; - // We never change the function. return false; } @@ -213,12 +214,15 @@ void SpillPlacement::activate(unsigned n) { // landing pads, or loops with many 'continue' statements. It is difficult to // allocate registers when so many different blocks are involved. // - // Give a small negative bias to large bundles such that 1/32 of the - // connected blocks need to be interested before we consider expanding the - // region through the bundle. This helps compile time by limiting the number - // of blocks visited and the number of links in the Hopfield network. - if (bundles->getBlocks(n).size() > 100) - nodes[n].Bias = -0.0625f; + // Give a small negative bias to large bundles such that a substantial + // fraction of the connected blocks need to be interested before we consider + // expanding the region through the bundle. This helps compile time by + // limiting the number of blocks visited and the number of links in the + // Hopfield network. + if (bundles->getBlocks(n).size() > 100) { + nodes[n].BiasP = 0; + nodes[n].BiasN = (BlockFrequency::getEntryFrequency() / 16); + } } @@ -227,27 +231,20 @@ void SpillPlacement::activate(unsigned n) { void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) { for (ArrayRef<BlockConstraint>::iterator I = LiveBlocks.begin(), E = LiveBlocks.end(); I != E; ++I) { - float Freq = getBlockFrequency(I->Number); - const float Bias[] = { - 0, // DontCare, - 1, // PrefReg, - -1, // PrefSpill - 0, // PrefBoth - -HUGE_VALF // MustSpill - }; + BlockFrequency Freq = BlockFrequencies[I->Number]; // Live-in to block? if (I->Entry != DontCare) { unsigned ib = bundles->getBundle(I->Number, 0); activate(ib); - nodes[ib].addBias(Freq * Bias[I->Entry], 1); + nodes[ib].addBias(Freq, I->Entry); } // Live-out from block? if (I->Exit != DontCare) { unsigned ob = bundles->getBundle(I->Number, 1); activate(ob); - nodes[ob].addBias(Freq * Bias[I->Exit], 0); + nodes[ob].addBias(Freq, I->Exit); } } } @@ -256,15 +253,15 @@ void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) { void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) { for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); I != E; ++I) { - float Freq = getBlockFrequency(*I); + BlockFrequency Freq = BlockFrequencies[*I]; if (Strong) Freq += Freq; unsigned ib = bundles->getBundle(*I, 0); unsigned ob = bundles->getBundle(*I, 1); activate(ib); activate(ob); - nodes[ib].addBias(-Freq, 1); - nodes[ob].addBias(-Freq, 0); + nodes[ib].addBias(Freq, PrefSpill); + nodes[ob].addBias(Freq, PrefSpill); } } @@ -284,9 +281,9 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) { Linked.push_back(ib); if (nodes[ob].Links.empty() && !nodes[ob].mustSpill()) Linked.push_back(ob); - float Freq = getBlockFrequency(Number); - nodes[ib].addLink(ob, Freq, 1); - nodes[ob].addLink(ib, Freq, 0); + BlockFrequency Freq = BlockFrequencies[Number]; + nodes[ib].addLink(ob, Freq); + nodes[ob].addLink(ib, Freq); } } diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h index fc412f8..105516b 100644 --- a/contrib/llvm/lib/CodeGen/SpillPlacement.h +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h @@ -30,6 +30,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/BlockFrequency.h" namespace llvm { @@ -57,7 +58,7 @@ class SpillPlacement : public MachineFunctionPass { SmallVector<unsigned, 8> RecentPositive; // Block frequencies are computed once. Indexed by block number. - SmallVector<float, 4> BlockFrequency; + SmallVector<BlockFrequency, 4> BlockFrequencies; public: static char ID; // Pass identification, replacement for typeid. @@ -139,8 +140,8 @@ public: /// getBlockFrequency - Return the estimated block execution frequency per /// function invocation. - float getBlockFrequency(unsigned Number) const { - return BlockFrequency[Number]; + BlockFrequency getBlockFrequency(unsigned Number) const { + return BlockFrequencies[Number]; } private: diff --git a/contrib/llvm/lib/CodeGen/Spiller.cpp b/contrib/llvm/lib/CodeGen/Spiller.cpp index 209792f..d5b3a4a 100644 --- a/contrib/llvm/lib/CodeGen/Spiller.cpp +++ b/contrib/llvm/lib/CodeGen/Spiller.cpp @@ -77,7 +77,7 @@ protected: DEBUG(dbgs() << "Spilling everywhere " << *li << "\n"); - assert(li->weight != HUGE_VALF && + assert(li->weight != llvm::huge_valf && "Attempting to spill already spilled value."); assert(!TargetRegisterInfo::isStackSlot(li->reg) && @@ -115,15 +115,14 @@ protected: indices.push_back(i); } - // Create a new vreg & interval for this instr. - LiveInterval *newLI = &LRE.create(); - newLI->weight = HUGE_VALF; + // Create a new virtual register for the load and/or store. + unsigned NewVReg = LRE.create(); // Update the reg operands & kill flags. for (unsigned i = 0; i < indices.size(); ++i) { unsigned mopIdx = indices[i]; MachineOperand &mop = mi->getOperand(mopIdx); - mop.setReg(newLI->reg); + mop.setReg(NewVReg); if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) { mop.setIsKill(true); } @@ -133,28 +132,20 @@ protected: // Insert reload if necessary. MachineBasicBlock::iterator miItr(mi); if (hasUse) { - tii->loadRegFromStackSlot(*mi->getParent(), miItr, newLI->reg, ss, trc, + MachineInstrSpan MIS(miItr); + + tii->loadRegFromStackSlot(*mi->getParent(), miItr, NewVReg, ss, trc, tri); - MachineInstr *loadInstr(prior(miItr)); - SlotIndex loadIndex = - lis->InsertMachineInstrInMaps(loadInstr).getRegSlot(); - SlotIndex endIndex = loadIndex.getNextIndex(); - VNInfo *loadVNI = - newLI->getNextValue(loadIndex, lis->getVNInfoAllocator()); - newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI)); + lis->InsertMachineInstrRangeInMaps(MIS.begin(), miItr); } // Insert store if necessary. if (hasDef) { - tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr),newLI->reg, + MachineInstrSpan MIS(miItr); + + tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), NewVReg, true, ss, trc, tri); - MachineInstr *storeInstr(llvm::next(miItr)); - SlotIndex storeIndex = - lis->InsertMachineInstrInMaps(storeInstr).getRegSlot(); - SlotIndex beginIndex = storeIndex.getPrevIndex(); - VNInfo *storeVNI = - newLI->getNextValue(beginIndex, lis->getVNInfoAllocator()); - newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI)); + lis->InsertMachineInstrRangeInMaps(llvm::next(miItr), MIS.end()); } } } diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp index 0a3818e..68a15f7 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp @@ -214,7 +214,7 @@ bool SplitAnalysis::calcLiveBlockInfo() { // When not live in, the first use should be a def. if (!BI.LiveIn) { - assert(LVI->start == LVI->valno->def && "Dangling LiveRange start"); + assert(LVI->start == LVI->valno->def && "Dangling Segment start"); assert(LVI->start == BI.FirstInstr && "First instr should be a def"); BI.FirstDef = BI.FirstInstr; } @@ -245,8 +245,8 @@ bool SplitAnalysis::calcLiveBlockInfo() { BI.FirstInstr = BI.FirstDef = LVI->start; } - // A LiveRange that starts in the middle of the block must be a def. - assert(LVI->start == LVI->valno->def && "Dangling LiveRange start"); + // A Segment that starts in the middle of the block must be a def. + assert(LVI->start == LVI->valno->def && "Dangling Segment start"); if (!BI.FirstDef) BI.FirstDef = LVI->start; } @@ -325,12 +325,14 @@ void SplitAnalysis::analyze(const LiveInterval *li) { SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm, - MachineDominatorTree &mdt) + MachineDominatorTree &mdt, + MachineBlockFrequencyInfo &mbfi) : SA(sa), LIS(lis), VRM(vrm), MRI(vrm.getMachineFunction().getRegInfo()), MDT(mdt), TII(*vrm.getMachineFunction().getTarget().getInstrInfo()), TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()), + MBFI(mbfi), Edit(0), OpenIdx(0), SpillMode(SM_Partition), @@ -375,7 +377,7 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, assert(ParentVNI && "Mapping NULL value"); assert(Idx.isValid() && "Invalid SlotIndex"); assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI"); - LiveInterval *LI = Edit->get(RegIdx); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); // Create a new value. VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator()); @@ -393,14 +395,14 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, // If the previous value was a simple mapping, add liveness for it now. if (VNInfo *OldVNI = InsP.first->second.getPointer()) { SlotIndex Def = OldVNI->def; - LI->addRange(LiveRange(Def, Def.getDeadSlot(), OldVNI)); + LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), OldVNI)); // No longer a simple mapping. Switch to a complex, non-forced mapping. InsP.first->second = ValueForcePair(); } // This is a complex mapping, add liveness for VNI SlotIndex Def = VNI->def; - LI->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); + LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI)); return VNI; } @@ -420,7 +422,8 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) { // This was previously a single mapping. Make sure the old def is represented // by a trivial live range. SlotIndex Def = VNI->def; - Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); + LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI)); // Mark as complex mapped, forced. VFP = ValueForcePair(0, true); } @@ -432,7 +435,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, MachineBasicBlock::iterator I) { MachineInstr *CopyMI = 0; SlotIndex Def; - LiveInterval *LI = Edit->get(RegIdx); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); // We may be trying to avoid interference that ends at a deleted instruction, // so always begin RegIdx 0 early and all others late. @@ -460,11 +463,11 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, unsigned SplitEditor::openIntv() { // Create the complement as index 0. if (Edit->empty()) - Edit->create(); + Edit->createEmptyInterval(); // Create the open interval. OpenIdx = Edit->size(); - Edit->create(); + Edit->createEmptyInterval(); return OpenIdx; } @@ -629,7 +632,7 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { //===----------------------------------------------------------------------===// void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { - LiveInterval *LI = Edit->get(0); + LiveInterval *LI = &LIS.getInterval(Edit->get(0)); DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n"); RegAssignMap::iterator AssignI; AssignI.setMap(RegAssign); @@ -728,7 +731,7 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB, void SplitEditor::hoistCopiesForSize() { // Get the complement interval, always RegIdx 0. - LiveInterval *LI = Edit->get(0); + LiveInterval *LI = &LIS.getInterval(Edit->get(0)); LiveInterval *Parent = &Edit->getParent(); // Track the nearest common dominator for all back-copies for each ParentVNI, @@ -859,13 +862,13 @@ bool SplitEditor::transferValues() { // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI. DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx); - LiveInterval *LI = Edit->get(RegIdx); + LiveRange &LR = LIS.getInterval(Edit->get(RegIdx)); // Check for a simply defined value that can be blitted directly. ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id)); if (VNInfo *VNI = VFP.getPointer()) { DEBUG(dbgs() << ':' << VNI->id); - LI->addRange(LiveRange(Start, End, VNI)); + LR.addSegment(LiveInterval::Segment(Start, End, VNI)); Start = End; continue; } @@ -889,7 +892,7 @@ bool SplitEditor::transferValues() { // The first block may be live-in, or it may have its own def. if (Start != BlockStart) { - VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End)); + VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped value"); DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber()); // MBB has its own def. Is it also live-out? @@ -909,7 +912,7 @@ bool SplitEditor::transferValues() { if (BlockStart == ParentVNI->def) { // This block has the def of a parent PHI, so it isn't live-in. assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?"); - VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End)); + VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped parent PHI"); if (End >= BlockEnd) LRC.setLiveOutValue(MBB, VNI); // Live-out as well. @@ -917,10 +920,10 @@ bool SplitEditor::transferValues() { // This block needs a live-in value. The last block covered may not // be live-out. if (End < BlockEnd) - LRC.addLiveInBlock(LI, MDT[MBB], End); + LRC.addLiveInBlock(LR, MDT[MBB], End); else { // Live-through, and we don't know the value. - LRC.addLiveInBlock(LI, MDT[MBB]); + LRC.addLiveInBlock(LR, MDT[MBB]); LRC.setLiveOutValue(MBB, 0); } } @@ -947,7 +950,7 @@ void SplitEditor::extendPHIKillRanges() { if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) continue; unsigned RegIdx = RegAssign.lookup(PHIVNI->def); - LiveInterval *LI = Edit->get(RegIdx); + LiveRange &LR = LIS.getInterval(Edit->get(RegIdx)); LiveRangeCalc &LRC = getLRCalc(RegIdx); MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), @@ -959,7 +962,7 @@ void SplitEditor::extendPHIKillRanges() { if (Edit->getParent().liveAt(LastUse)) { assert(RegAssign.lookup(LastUse) == RegIdx && "Different register assignment in phi predecessor"); - LRC.extend(LI, End); + LRC.extend(LR, End); } } } @@ -988,7 +991,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { // Rewrite to the mapped register at Idx. unsigned RegIdx = RegAssign.lookup(Idx); - LiveInterval *LI = Edit->get(RegIdx); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); MO.setReg(LI->reg); DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' << Idx << ':' << RegIdx << '\t' << *MI); @@ -1009,14 +1012,14 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { } else Idx = Idx.getRegSlot(true); - getLRCalc(RegIdx).extend(LI, Idx.getNextSlot()); + getLRCalc(RegIdx).extend(*LI, Idx.getNextSlot()); } } void SplitEditor::deleteRematVictims() { SmallVector<MachineInstr*, 8> Dead; for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){ - LiveInterval *LI = *I; + LiveInterval *LI = &LIS.getInterval(*I); for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end(); LII != LIE; ++LII) { // Dead defs end at the dead slot. @@ -1089,8 +1092,10 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { deleteRematVictims(); // Get rid of unused values and set phi-kill flags. - for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) - (*I)->RenumberValues(LIS); + for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) { + LiveInterval &LI = LIS.getInterval(*I); + LI.RenumberValues(); + } // Provide a reverse mapping from original indices to Edit ranges. if (LRMap) { @@ -1103,7 +1108,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { ConnectedVNInfoEqClasses ConEQ(LIS); for (unsigned i = 0, e = Edit->size(); i != e; ++i) { // Don't use iterators, they are invalidated by create() below. - LiveInterval *li = Edit->get(i); + LiveInterval *li = &LIS.getInterval(Edit->get(i)); unsigned NumComp = ConEQ.Classify(li); if (NumComp <= 1) continue; @@ -1111,7 +1116,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { SmallVector<LiveInterval*, 8> dups; dups.push_back(li); for (unsigned j = 1; j != NumComp; ++j) - dups.push_back(&Edit->create()); + dups.push_back(&Edit->createEmptyInterval()); ConEQ.Distribute(&dups[0], MRI); // The new intervals all map back to i. if (LRMap) @@ -1119,7 +1124,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { } // Calculate spill weight and allocation hints for new intervals. - Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops); + Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops, MBFI); assert(!LRMap || LRMap->size() == Edit->size()); } diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h index 4005a3d..f029c73 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.h +++ b/contrib/llvm/lib/CodeGen/SplitKit.h @@ -27,6 +27,7 @@ class ConnectedVNInfoEqClasses; class LiveInterval; class LiveIntervals; class LiveRangeEdit; +class MachineBlockFrequencyInfo; class MachineInstr; class MachineLoopInfo; class MachineRegisterInfo; @@ -215,6 +216,7 @@ class SplitEditor { MachineDominatorTree &MDT; const TargetInstrInfo &TII; const TargetRegisterInfo &TRI; + const MachineBlockFrequencyInfo &MBFI; public: @@ -349,7 +351,7 @@ public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. /// Newly created intervals will be appended to newIntervals. SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&, - MachineDominatorTree&); + MachineDominatorTree&, MachineBlockFrequencyInfo &); /// reset - Prepare for a new split. void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition); diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp index a789a25..3dbc050 100644 --- a/contrib/llvm/lib/CodeGen/StackColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp @@ -42,6 +42,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/DebugInfo.h" #include "llvm/IR/Function.h" @@ -169,7 +170,7 @@ private: /// slots to use the joint slots. void remapInstructions(DenseMap<int, int> &SlotRemap); - /// The input program may contain intructions which are not inside lifetime + /// The input program may contain instructions which are not inside lifetime /// markers. This can happen due to a bug in the compiler or due to a bug in /// user code (for example, returning a reference to a local variable). /// This procedure checks all of the instructions in the function and @@ -309,9 +310,9 @@ void StackColoring::calculateLocalLiveness() { SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet; - for (SmallVector<const MachineBasicBlock*, 8>::iterator - PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); - PI != PE; ++PI) { + for (SmallVectorImpl<const MachineBasicBlock *>::iterator + PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); + PI != PE; ++PI) { const MachineBasicBlock *BB = *PI; if (!BBSet.count(BB)) continue; @@ -428,17 +429,14 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { } // Create the interval of the blocks that we previously found to be 'alive'. - BitVector Alive = BlockLiveness[MBB].LiveIn; - Alive |= BlockLiveness[MBB].LiveOut; - - if (Alive.any()) { - for (int pos = Alive.find_first(); pos != -1; - pos = Alive.find_next(pos)) { - if (!Starts[pos].isValid()) - Starts[pos] = Indexes->getMBBStartIdx(MBB); - if (!Finishes[pos].isValid()) - Finishes[pos] = Indexes->getMBBEndIdx(MBB); - } + BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB]; + for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1; + pos = MBBLiveness.LiveIn.find_next(pos)) { + Starts[pos] = Indexes->getMBBStartIdx(MBB); + } + for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1; + pos = MBBLiveness.LiveOut.find_next(pos)) { + Finishes[pos] = Indexes->getMBBEndIdx(MBB); } for (unsigned i = 0; i < NumSlots; ++i) { @@ -452,14 +450,14 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { SlotIndex F = Finishes[i]; if (S < F) { // We have a single consecutive region. - Intervals[i]->addRange(LiveRange(S, F, ValNum)); + Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum)); } else { // We have two non consecutive regions. This happens when // LIFETIME_START appears after the LIFETIME_END marker. SlotIndex NewStart = Indexes->getMBBStartIdx(MBB); SlotIndex NewFin = Indexes->getMBBEndIdx(MBB); - Intervals[i]->addRange(LiveRange(NewStart, F, ValNum)); - Intervals[i]->addRange(LiveRange(S, NewFin, ValNum)); + Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum)); + Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum)); } } } @@ -528,6 +526,10 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { if (!V) continue; + const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V); + if (PSV && PSV->isConstant(MFI)) + continue; + // Climb up and find the original alloca. V = GetUnderlyingObject(V); // If we did not find one, or if the one that we found is not in our @@ -761,7 +763,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Merge disjoint slots. if (!First->overlaps(*Second)) { Changed = true; - First->MergeRangesInAsValue(*Second, First->getValNumInfo(0)); + First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0)); SlotRemap[SecondSlot] = FirstSlot; SortedSlots[J] = -1; DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<< diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp new file mode 100644 index 0000000..40893ea --- /dev/null +++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp @@ -0,0 +1,314 @@ +//===---------------------------- StackMaps.cpp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stackmaps" + +#include "llvm/CodeGen/StackMaps.h" + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include <iterator> + +using namespace llvm; + +PatchPointOpers::PatchPointOpers(const MachineInstr *MI): + MI(MI), + HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && + !MI->getOperand(0).isImplicit()), + IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == CallingConv::AnyReg) { + +#ifndef NDEBUG + { + unsigned CheckStartIdx = 0, e = MI->getNumOperands(); + while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() && + MI->getOperand(CheckStartIdx).isDef() && + !MI->getOperand(CheckStartIdx).isImplicit()) + ++CheckStartIdx; + + assert(getMetaIdx() == CheckStartIdx && + "Unexpected additonal definition in Patchpoint intrinsic."); + } +#endif +} + +unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const { + if (!StartIdx) + StartIdx = getVarIdx(); + + // Find the next scratch register (implicit def and early clobber) + unsigned ScratchIdx = StartIdx, e = MI->getNumOperands(); + while (ScratchIdx < e && + !(MI->getOperand(ScratchIdx).isReg() && + MI->getOperand(ScratchIdx).isDef() && + MI->getOperand(ScratchIdx).isImplicit() && + MI->getOperand(ScratchIdx).isEarlyClobber())) + ++ScratchIdx; + + assert(ScratchIdx != e && "No scratch register available"); + return ScratchIdx; +} + +void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint32_t ID, + MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE, + bool recordResult) { + + MCContext &OutContext = AP.OutStreamer.getContext(); + MCSymbol *MILabel = OutContext.CreateTempSymbol(); + AP.OutStreamer.EmitLabel(MILabel); + + LocationVec CallsiteLocs; + + if (recordResult) { + std::pair<Location, MachineInstr::const_mop_iterator> ParseResult = + OpParser(MI.operands_begin(), llvm::next(MI.operands_begin()), AP.TM); + + Location &Loc = ParseResult.first; + assert(Loc.LocType == Location::Register && + "Stackmap return location must be a register."); + CallsiteLocs.push_back(Loc); + } + + while (MOI != MOE) { + std::pair<Location, MachineInstr::const_mop_iterator> ParseResult = + OpParser(MOI, MOE, AP.TM); + + Location &Loc = ParseResult.first; + + // Move large constants into the constant pool. + if (Loc.LocType == Location::Constant && (Loc.Offset & ~0xFFFFFFFFULL)) { + Loc.LocType = Location::ConstantIndex; + Loc.Offset = ConstPool.getConstantIndex(Loc.Offset); + } + + CallsiteLocs.push_back(Loc); + MOI = ParseResult.second; + } + + const MCExpr *CSOffsetExpr = MCBinaryExpr::CreateSub( + MCSymbolRefExpr::Create(MILabel, OutContext), + MCSymbolRefExpr::Create(AP.CurrentFnSym, OutContext), + OutContext); + + CSInfos.push_back(CallsiteInfo(CSOffsetExpr, ID, CallsiteLocs)); +} + +static MachineInstr::const_mop_iterator +getStackMapEndMOP(MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE) { + for (; MOI != MOE; ++MOI) + if (MOI->isRegMask() || (MOI->isReg() && MOI->isImplicit())) + break; + + return MOI; +} + +void StackMaps::recordStackMap(const MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::STACKMAP && "exected stackmap"); + + int64_t ID = MI.getOperand(0).getImm(); + assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs"); + recordStackMapOpers(MI, ID, llvm::next(MI.operands_begin(), 2), + getStackMapEndMOP(MI.operands_begin(), + MI.operands_end())); +} + +void StackMaps::recordPatchPoint(const MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "exected stackmap"); + + PatchPointOpers opers(&MI); + int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm(); + assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs"); + MachineInstr::const_mop_iterator MOI = + llvm::next(MI.operands_begin(), opers.getStackMapStartIdx()); + recordStackMapOpers(MI, ID, MOI, getStackMapEndMOP(MOI, MI.operands_end()), + opers.isAnyReg() && opers.hasDef()); + +#ifndef NDEBUG + // verify anyregcc + LocationVec &Locations = CSInfos.back().Locations; + if (opers.isAnyReg()) { + unsigned NArgs = opers.getMetaOper(PatchPointOpers::NArgPos).getImm(); + for (unsigned i = 0, e = (opers.hasDef() ? NArgs+1 : NArgs); i != e; ++i) + assert(Locations[i].LocType == Location::Register && + "anyreg arg must be in reg."); + } +#endif +} + +/// serializeToStackMapSection conceptually populates the following fields: +/// +/// uint32 : Reserved (header) +/// uint32 : NumConstants +/// int64 : Constants[NumConstants] +/// uint32 : NumRecords +/// StkMapRecord[NumRecords] { +/// uint32 : PatchPoint ID +/// uint32 : Instruction Offset +/// uint16 : Reserved (record flags) +/// uint16 : NumLocations +/// Location[NumLocations] { +/// uint8 : Register | Direct | Indirect | Constant | ConstantIndex +/// uint8 : Size in Bytes +/// uint16 : Dwarf RegNum +/// int32 : Offset +/// } +/// } +/// +/// Location Encoding, Type, Value: +/// 0x1, Register, Reg (value in register) +/// 0x2, Direct, Reg + Offset (frame index) +/// 0x3, Indirect, [Reg + Offset] (spilled value) +/// 0x4, Constant, Offset (small constant) +/// 0x5, ConstIndex, Constants[Offset] (large constant) +/// +void StackMaps::serializeToStackMapSection() { + // Bail out if there's no stack map data. + if (CSInfos.empty()) + return; + + MCContext &OutContext = AP.OutStreamer.getContext(); + const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo(); + + // Create the section. + const MCSection *StackMapSection = + OutContext.getObjectFileInfo()->getStackMapSection(); + AP.OutStreamer.SwitchSection(StackMapSection); + + // Emit a dummy symbol to force section inclusion. + AP.OutStreamer.EmitLabel( + OutContext.GetOrCreateSymbol(Twine("__LLVM_StackMaps"))); + + // Serialize data. + const char *WSMP = "Stack Maps: "; + (void)WSMP; + const MCRegisterInfo &MCRI = *OutContext.getRegisterInfo(); + + DEBUG(dbgs() << "********** Stack Map Output **********\n"); + + // Header. + AP.OutStreamer.EmitIntValue(0, 4); + + // Num constants. + AP.OutStreamer.EmitIntValue(ConstPool.getNumConstants(), 4); + + // Constant pool entries. + for (unsigned i = 0; i < ConstPool.getNumConstants(); ++i) + AP.OutStreamer.EmitIntValue(ConstPool.getConstant(i), 8); + + DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << "\n"); + AP.OutStreamer.EmitIntValue(CSInfos.size(), 4); + + for (CallsiteInfoList::const_iterator CSII = CSInfos.begin(), + CSIE = CSInfos.end(); + CSII != CSIE; ++CSII) { + + unsigned CallsiteID = CSII->ID; + const LocationVec &CSLocs = CSII->Locations; + + DEBUG(dbgs() << WSMP << "callsite " << CallsiteID << "\n"); + + // Verify stack map entry. It's better to communicate a problem to the + // runtime than crash in case of in-process compilation. Currently, we do + // simple overflow checks, but we may eventually communicate other + // compilation errors this way. + if (CSLocs.size() > UINT16_MAX) { + AP.OutStreamer.EmitIntValue(UINT32_MAX, 4); // Invalid ID. + AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4); + AP.OutStreamer.EmitIntValue(0, 2); // Reserved. + AP.OutStreamer.EmitIntValue(0, 2); // 0 locations. + continue; + } + + AP.OutStreamer.EmitIntValue(CallsiteID, 4); + AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4); + + // Reserved for flags. + AP.OutStreamer.EmitIntValue(0, 2); + + DEBUG(dbgs() << WSMP << " has " << CSLocs.size() << " locations\n"); + + AP.OutStreamer.EmitIntValue(CSLocs.size(), 2); + + unsigned operIdx = 0; + for (LocationVec::const_iterator LocI = CSLocs.begin(), LocE = CSLocs.end(); + LocI != LocE; ++LocI, ++operIdx) { + const Location &Loc = *LocI; + DEBUG( + dbgs() << WSMP << " Loc " << operIdx << ": "; + switch (Loc.LocType) { + case Location::Unprocessed: + dbgs() << "<Unprocessed operand>"; + break; + case Location::Register: + dbgs() << "Register " << MCRI.getName(Loc.Reg); + break; + case Location::Direct: + dbgs() << "Direct " << MCRI.getName(Loc.Reg); + if (Loc.Offset) + dbgs() << " + " << Loc.Offset; + break; + case Location::Indirect: + dbgs() << "Indirect " << MCRI.getName(Loc.Reg) + << " + " << Loc.Offset; + break; + case Location::Constant: + dbgs() << "Constant " << Loc.Offset; + break; + case Location::ConstantIndex: + dbgs() << "Constant Index " << Loc.Offset; + break; + } + dbgs() << "\n"; + ); + + unsigned RegNo = 0; + int Offset = Loc.Offset; + if(Loc.Reg) { + RegNo = MCRI.getDwarfRegNum(Loc.Reg, false); + for (MCSuperRegIterator SR(Loc.Reg, TRI); + SR.isValid() && (int)RegNo < 0; ++SR) { + RegNo = TRI->getDwarfRegNum(*SR, false); + } + // If this is a register location, put the subregister byte offset in + // the location offset. + if (Loc.LocType == Location::Register) { + assert(!Loc.Offset && "Register location should have zero offset"); + unsigned LLVMRegNo = MCRI.getLLVMRegNum(RegNo, false); + unsigned SubRegIdx = MCRI.getSubRegIndex(LLVMRegNo, Loc.Reg); + if (SubRegIdx) + Offset = MCRI.getSubRegIdxOffset(SubRegIdx); + } + } + else { + assert(Loc.LocType != Location::Register && + "Missing location register"); + } + AP.OutStreamer.EmitIntValue(Loc.LocType, 1); + AP.OutStreamer.EmitIntValue(Loc.Size, 1); + AP.OutStreamer.EmitIntValue(RegNo, 2); + AP.OutStreamer.EmitIntValue(Offset, 4); + } + } + + AP.OutStreamer.AddBlankLine(); + + CSInfos.clear(); +} diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp index fbef347..9020449 100644 --- a/contrib/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -15,147 +15,120 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "stack-protector" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/Triple.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" -#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetLowering.h" +#include <cstdlib> using namespace llvm; STATISTIC(NumFunProtected, "Number of functions protected"); STATISTIC(NumAddrTaken, "Number of local variables that have their address" " taken."); -namespace { - class StackProtector : public FunctionPass { - /// TLI - Keep a pointer of a TargetLowering to consult for determining - /// target type sizes. - const TargetLoweringBase *TLI; - - Function *F; - Module *M; - - DominatorTree *DT; - - /// VisitedPHIs - The set of PHI nodes visited when determining - /// if a variable's reference has been taken. This set - /// is maintained to ensure we don't visit the same PHI node multiple - /// times. - SmallPtrSet<const PHINode*, 16> VisitedPHIs; - - /// InsertStackProtectors - Insert code into the prologue and epilogue of - /// the function. - /// - /// - The prologue code loads and stores the stack guard onto the stack. - /// - The epilogue checks the value stored in the prologue against the - /// original value. It calls __stack_chk_fail if they differ. - bool InsertStackProtectors(); - - /// CreateFailBB - Create a basic block to jump to when the stack protector - /// check fails. - BasicBlock *CreateFailBB(); - - /// ContainsProtectableArray - Check whether the type either is an array or - /// contains an array of sufficient size so that we need stack protectors - /// for it. - bool ContainsProtectableArray(Type *Ty, bool Strong = false, - bool InStruct = false) const; - - /// \brief Check whether a stack allocation has its address taken. - bool HasAddressTaken(const Instruction *AI); - - /// RequiresStackProtector - Check whether or not this function needs a - /// stack protector based upon the stack protector level. - bool RequiresStackProtector(); - public: - static char ID; // Pass identification, replacement for typeid. - StackProtector() : FunctionPass(ID), TLI(0) { - initializeStackProtectorPass(*PassRegistry::getPassRegistry()); - } - StackProtector(const TargetLoweringBase *tli) - : FunctionPass(ID), TLI(tli) { - initializeStackProtectorPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved<DominatorTree>(); - } - - virtual bool runOnFunction(Function &Fn); - }; -} // end anonymous namespace +static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp", + cl::init(true), cl::Hidden); char StackProtector::ID = 0; -INITIALIZE_PASS(StackProtector, "stack-protector", - "Insert stack protectors", false, false) +INITIALIZE_PASS(StackProtector, "stack-protector", "Insert stack protectors", + false, true) -FunctionPass *llvm::createStackProtectorPass(const TargetLoweringBase *tli) { - return new StackProtector(tli); +FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) { + return new StackProtector(TM); +} + +StackProtector::SSPLayoutKind +StackProtector::getSSPLayout(const AllocaInst *AI) const { + return AI ? Layout.lookup(AI) : SSPLK_None; } bool StackProtector::runOnFunction(Function &Fn) { F = &Fn; M = F->getParent(); DT = getAnalysisIfAvailable<DominatorTree>(); + TLI = TM->getTargetLowering(); - if (!RequiresStackProtector()) return false; + if (!RequiresStackProtector()) + return false; + + Attribute Attr = Fn.getAttributes().getAttribute( + AttributeSet::FunctionIndex, "stack-protector-buffer-size"); + if (Attr.isStringAttribute()) + Attr.getValueAsString().getAsInteger(10, SSPBufferSize); ++NumFunProtected; return InsertStackProtectors(); } -/// ContainsProtectableArray - Check whether the type either is an array or -/// contains a char array of sufficient size so that we need stack protectors -/// for it. -bool StackProtector::ContainsProtectableArray(Type *Ty, bool Strong, +/// \param [out] IsLarge is set to true if a protectable array is found and +/// it is "large" ( >= ssp-buffer-size). In the case of a structure with +/// multiple arrays, this gets set if any of them is large. +bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge, + bool Strong, bool InStruct) const { - if (!Ty) return false; + if (!Ty) + return false; if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { - // In strong mode any array, regardless of type and size, triggers a - // protector - if (Strong) - return true; - const TargetMachine &TM = TLI->getTargetMachine(); if (!AT->getElementType()->isIntegerTy(8)) { - Triple Trip(TM.getTargetTriple()); - // If we're on a non-Darwin platform or we're inside of a structure, don't // add stack protectors unless the array is a character array. - if (InStruct || !Trip.isOSDarwin()) - return false; + // However, in strong mode any array, regardless of type and size, + // triggers a protector. + if (!Strong && (InStruct || !Trip.isOSDarwin())) + return false; } // If an array has more than SSPBufferSize bytes of allocated space, then we // emit stack protectors. - if (TM.Options.SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT)) + if (SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT)) { + IsLarge = true; + return true; + } + + if (Strong) + // Require a protector for all arrays in strong mode return true; } const StructType *ST = dyn_cast<StructType>(Ty); - if (!ST) return false; + if (!ST) + return false; + bool NeedsProtector = false; for (StructType::element_iterator I = ST->element_begin(), - E = ST->element_end(); I != E; ++I) - if (ContainsProtectableArray(*I, Strong, true)) - return true; + E = ST->element_end(); + I != E; ++I) + if (ContainsProtectableArray(*I, IsLarge, Strong, true)) { + // If the element is a protectable array and is large (>= SSPBufferSize) + // then we are done. If the protectable array is not large, then + // keep looking in case a subsequent element is a large array. + if (IsLarge) + return true; + NeedsProtector = true; + } - return false; + return NeedsProtector; } bool StackProtector::HasAddressTaken(const Instruction *AI) { for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE; ++UI) { + UI != UE; ++UI) { const User *U = *UI; if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { if (AI == SI->getValueOperand()) @@ -202,11 +175,13 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) { /// address taken. bool StackProtector::RequiresStackProtector() { bool Strong = false; + bool NeedsProtector = false; if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectReq)) - return true; - else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectStrong)) + Attribute::StackProtectReq)) { + NeedsProtector = true; + Strong = true; // Use the same heuristic as strong to determine SSPLayout + } else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectStrong)) Strong = true; else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::StackProtect)) @@ -215,38 +190,156 @@ bool StackProtector::RequiresStackProtector() { for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { BasicBlock *BB = I; - for (BasicBlock::iterator - II = BB->begin(), IE = BB->end(); II != IE; ++II) { + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; + ++II) { if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) { if (AI->isArrayAllocation()) { // SSP-Strong: Enable protectors for any call to alloca, regardless // of size. if (Strong) return true; - + if (const ConstantInt *CI = - dyn_cast<ConstantInt>(AI->getArraySize())) { - unsigned BufferSize = TLI->getTargetMachine().Options.SSPBufferSize; - if (CI->getLimitedValue(BufferSize) >= BufferSize) + dyn_cast<ConstantInt>(AI->getArraySize())) { + if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) { // A call to alloca with size >= SSPBufferSize requires // stack protectors. - return true; - } else // A call to alloca with a variable size requires protectors. - return true; + Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); + NeedsProtector = true; + } else if (Strong) { + // Require protectors for all alloca calls in strong mode. + Layout.insert(std::make_pair(AI, SSPLK_SmallArray)); + NeedsProtector = true; + } + } else { + // A call to alloca with a variable size requires protectors. + Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); + NeedsProtector = true; + } + continue; } - if (ContainsProtectableArray(AI->getAllocatedType(), Strong)) - return true; + bool IsLarge = false; + if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) { + Layout.insert(std::make_pair(AI, IsLarge ? SSPLK_LargeArray + : SSPLK_SmallArray)); + NeedsProtector = true; + continue; + } if (Strong && HasAddressTaken(AI)) { - ++NumAddrTaken; - return true; + ++NumAddrTaken; + Layout.insert(std::make_pair(AI, SSPLK_AddrOf)); + NeedsProtector = true; } } } } - return false; + return NeedsProtector; +} + +static bool InstructionWillNotHaveChain(const Instruction *I) { + return !I->mayHaveSideEffects() && !I->mayReadFromMemory() && + isSafeToSpeculativelyExecute(I); +} + +/// Identify if RI has a previous instruction in the "Tail Position" and return +/// it. Otherwise return 0. +/// +/// This is based off of the code in llvm::isInTailCallPosition. The difference +/// is that it inverts the first part of llvm::isInTailCallPosition since +/// isInTailCallPosition is checking if a call is in a tail call position, and +/// we are searching for an unknown tail call that might be in the tail call +/// position. Once we find the call though, the code uses the same refactored +/// code, returnTypeIsEligibleForTailCall. +static CallInst *FindPotentialTailCall(BasicBlock *BB, ReturnInst *RI, + const TargetLoweringBase *TLI) { + // Establish a reasonable upper bound on the maximum amount of instructions we + // will look through to find a tail call. + unsigned SearchCounter = 0; + const unsigned MaxSearch = 4; + bool NoInterposingChain = true; + + for (BasicBlock::reverse_iterator I = llvm::next(BB->rbegin()), + E = BB->rend(); + I != E && SearchCounter < MaxSearch; ++I) { + Instruction *Inst = &*I; + + // Skip over debug intrinsics and do not allow them to affect our MaxSearch + // counter. + if (isa<DbgInfoIntrinsic>(Inst)) + continue; + + // If we find a call and the following conditions are satisifed, then we + // have found a tail call that satisfies at least the target independent + // requirements of a tail call: + // + // 1. The call site has the tail marker. + // + // 2. The call site either will not cause the creation of a chain or if a + // chain is necessary there are no instructions in between the callsite and + // the call which would create an interposing chain. + // + // 3. The return type of the function does not impede tail call + // optimization. + if (CallInst *CI = dyn_cast<CallInst>(Inst)) { + if (CI->isTailCall() && + (InstructionWillNotHaveChain(CI) || NoInterposingChain) && + returnTypeIsEligibleForTailCall(BB->getParent(), CI, RI, *TLI)) + return CI; + } + + // If we did not find a call see if we have an instruction that may create + // an interposing chain. + NoInterposingChain = + NoInterposingChain && InstructionWillNotHaveChain(Inst); + + // Increment max search. + SearchCounter++; + } + + return 0; +} + +/// Insert code into the entry block that stores the __stack_chk_guard +/// variable onto the stack: +/// +/// entry: +/// StackGuardSlot = alloca i8* +/// StackGuard = load __stack_chk_guard +/// call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) +/// +/// Returns true if the platform/triple supports the stackprotectorcreate pseudo +/// node. +static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, + const TargetLoweringBase *TLI, const Triple &Trip, + AllocaInst *&AI, Value *&StackGuardVar) { + bool SupportsSelectionDAGSP = false; + PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); + unsigned AddressSpace, Offset; + if (TLI->getStackCookieLocation(AddressSpace, Offset)) { + Constant *OffsetVal = + ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); + + StackGuardVar = ConstantExpr::getIntToPtr( + OffsetVal, PointerType::get(PtrTy, AddressSpace)); + } else if (Trip.getOS() == llvm::Triple::OpenBSD) { + StackGuardVar = M->getOrInsertGlobal("__guard_local", PtrTy); + cast<GlobalValue>(StackGuardVar) + ->setVisibility(GlobalValue::HiddenVisibility); + } else { + SupportsSelectionDAGSP = true; + StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + } + + IRBuilder<> B(&F->getEntryBlock().front()); + AI = B.CreateAlloca(PtrTy, 0, "StackGuardSlot"); + LoadInst *LI = B.CreateLoad(StackGuardVar, "StackGuard"); + B.CreateCall2(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), LI, + AI); + + return SupportsSelectionDAGSP; } /// InsertStackProtectors - Insert code into the prologue and epilogue of the @@ -256,102 +349,102 @@ bool StackProtector::RequiresStackProtector() { /// - The epilogue checks the value stored in the prologue against the original /// value. It calls __stack_chk_fail if they differ. bool StackProtector::InsertStackProtectors() { - BasicBlock *FailBB = 0; // The basic block to jump to if check fails. - BasicBlock *FailBBDom = 0; // FailBB's dominator. - AllocaInst *AI = 0; // Place on stack that stores the stack guard. - Value *StackGuardVar = 0; // The stack guard variable. + bool HasPrologue = false; + bool SupportsSelectionDAGSP = + EnableSelectionDAGSP && !TM->Options.EnableFastISel; + AllocaInst *AI = 0; // Place on stack that stores the stack guard. + Value *StackGuardVar = 0; // The stack guard variable. - for (Function::iterator I = F->begin(), E = F->end(); I != E; ) { + for (Function::iterator I = F->begin(), E = F->end(); I != E;) { BasicBlock *BB = I++; ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); - if (!RI) continue; + if (!RI) + continue; - if (!FailBB) { - // Insert code into the entry block that stores the __stack_chk_guard - // variable onto the stack: - // - // entry: - // StackGuardSlot = alloca i8* - // StackGuard = load __stack_chk_guard - // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) - // - PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); - unsigned AddressSpace, Offset; - if (TLI->getStackCookieLocation(AddressSpace, Offset)) { - Constant *OffsetVal = - ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); - - StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal, - PointerType::get(PtrTy, AddressSpace)); + if (!HasPrologue) { + HasPrologue = true; + SupportsSelectionDAGSP &= + CreatePrologue(F, M, RI, TLI, Trip, AI, StackGuardVar); + } + + if (SupportsSelectionDAGSP) { + // Since we have a potential tail call, insert the special stack check + // intrinsic. + Instruction *InsertionPt = 0; + if (CallInst *CI = FindPotentialTailCall(BB, RI, TLI)) { + InsertionPt = CI; } else { - StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + InsertionPt = RI; + // At this point we know that BB has a return statement so it *DOES* + // have a terminator. + assert(InsertionPt != 0 && "BB must have a terminator instruction at " + "this point."); } - BasicBlock &Entry = F->getEntryBlock(); - Instruction *InsPt = &Entry.front(); - - AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt); - LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt); + Function *Intrinsic = + Intrinsic::getDeclaration(M, Intrinsic::stackprotectorcheck); + CallInst::Create(Intrinsic, StackGuardVar, "", InsertionPt); - Value *Args[] = { LI, AI }; - CallInst:: - Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), - Args, "", InsPt); - - // Create the basic block to jump to when the guard check fails. - FailBB = CreateFailBB(); - } + } else { + // If we do not support SelectionDAG based tail calls, generate IR level + // tail calls. + // + // For each block with a return instruction, convert this: + // + // return: + // ... + // ret ... + // + // into this: + // + // return: + // ... + // %1 = load __stack_chk_guard + // %2 = load StackGuardSlot + // %3 = cmp i1 %1, %2 + // br i1 %3, label %SP_return, label %CallStackCheckFailBlk + // + // SP_return: + // ret ... + // + // CallStackCheckFailBlk: + // call void @__stack_chk_fail() + // unreachable + + // Create the FailBB. We duplicate the BB every time since the MI tail + // merge pass will merge together all of the various BB into one including + // fail BB generated by the stack protector pseudo instruction. + BasicBlock *FailBB = CreateFailBB(); + + // Split the basic block before the return instruction. + BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); + + // Update the dominator tree if we need to. + if (DT && DT->isReachableFromEntry(BB)) { + DT->addNewBlock(NewBB, BB); + DT->addNewBlock(FailBB, BB); + } - // For each block with a return instruction, convert this: - // - // return: - // ... - // ret ... - // - // into this: - // - // return: - // ... - // %1 = load __stack_chk_guard - // %2 = load StackGuardSlot - // %3 = cmp i1 %1, %2 - // br i1 %3, label %SP_return, label %CallStackCheckFailBlk - // - // SP_return: - // ret ... - // - // CallStackCheckFailBlk: - // call void @__stack_chk_fail() - // unreachable + // Remove default branch instruction to the new BB. + BB->getTerminator()->eraseFromParent(); - // Split the basic block before the return instruction. - BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); + // Move the newly created basic block to the point right after the old + // basic block so that it's in the "fall through" position. + NewBB->moveAfter(BB); - if (DT && DT->isReachableFromEntry(BB)) { - DT->addNewBlock(NewBB, BB); - FailBBDom = FailBBDom ? DT->findNearestCommonDominator(FailBBDom, BB) :BB; + // Generate the stack protector instructions in the old basic block. + IRBuilder<> B(BB); + LoadInst *LI1 = B.CreateLoad(StackGuardVar); + LoadInst *LI2 = B.CreateLoad(AI); + Value *Cmp = B.CreateICmpEQ(LI1, LI2); + B.CreateCondBr(Cmp, NewBB, FailBB); } - - // Remove default branch instruction to the new BB. - BB->getTerminator()->eraseFromParent(); - - // Move the newly created basic block to the point right after the old basic - // block so that it's in the "fall through" position. - NewBB->moveAfter(BB); - - // Generate the stack protector instructions in the old basic block. - LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB); - LoadInst *LI2 = new LoadInst(AI, "", true, BB); - ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, ""); - BranchInst::Create(NewBB, FailBB, Cmp, BB); } // Return if we didn't modify any basic blocks. I.e., there are no return // statements in the function. - if (!FailBB) return false; - - if (DT && FailBBDom) - DT->addNewBlock(FailBB, FailBBDom); + if (!HasPrologue) + return false; return true; } @@ -359,12 +452,20 @@ bool StackProtector::InsertStackProtectors() { /// CreateFailBB - Create a basic block to jump to when the stack protector /// check fails. BasicBlock *StackProtector::CreateFailBB() { - BasicBlock *FailBB = BasicBlock::Create(F->getContext(), - "CallStackCheckFailBlk", F); - Constant *StackChkFail = - M->getOrInsertFunction("__stack_chk_fail", - Type::getVoidTy(F->getContext()), NULL); - CallInst::Create(StackChkFail, "", FailBB); - new UnreachableInst(F->getContext(), FailBB); + LLVMContext &Context = F->getContext(); + BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F); + IRBuilder<> B(FailBB); + if (Trip.getOS() == llvm::Triple::OpenBSD) { + Constant *StackChkFail = M->getOrInsertFunction( + "__stack_smash_handler", Type::getVoidTy(Context), + Type::getInt8PtrTy(Context), NULL); + + B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH")); + } else { + Constant *StackChkFail = M->getOrInsertFunction( + "__stack_chk_fail", Type::getVoidTy(Context), NULL); + B.CreateCall(StackChkFail); + } + B.CreateUnreachable(); return FailBB; } diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp index f951561..9f44df8 100644 --- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -14,20 +14,20 @@ #define DEBUG_TYPE "stackslotcoloring" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include <vector> @@ -48,13 +48,16 @@ namespace { LiveStacks* LS; MachineFrameInfo *MFI; const TargetInstrInfo *TII; - const MachineLoopInfo *loopInfo; + const MachineBlockFrequencyInfo *MBFI; // SSIntervals - Spill slot intervals. std::vector<LiveInterval*> SSIntervals; - // SSRefs - Keep a list of frame index references for each spill slot. - SmallVector<SmallVector<MachineInstr*, 8>, 16> SSRefs; + // SSRefs - Keep a list of MachineMemOperands for each spill slot. + // MachineMemOperands can be shared between instructions, so we need + // to be careful that renames like [FI0, FI1] -> [FI1, FI2] do not + // become FI0 -> FI1 -> FI2. + SmallVector<SmallVector<MachineMemOperand *, 8>, 16> SSRefs; // OrigAlignments - Alignments of stack objects before coloring. SmallVector<unsigned, 16> OrigAlignments; @@ -89,8 +92,8 @@ namespace { AU.addRequired<SlotIndexes>(); AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveStacks>(); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -103,7 +106,7 @@ namespace { bool OverlapWithAssignments(LiveInterval *li, int Color) const; int ColorSlot(LiveInterval *li); bool ColorSlots(MachineFunction &MF); - void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI, + void RewriteInstruction(MachineInstr *MI, SmallVectorImpl<int> &SlotMapping, MachineFunction &MF); bool RemoveDeadStores(MachineBasicBlock* MBB); }; @@ -139,7 +142,7 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = &*MBBI; - unsigned loopDepth = loopInfo->getLoopDepth(MBB); + BlockFrequency Freq = MBFI->getBlockFreq(MBB); for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); MII != EE; ++MII) { MachineInstr *MI = &*MII; @@ -154,8 +157,19 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { continue; LiveInterval &li = LS->getInterval(FI); if (!MI->isDebugValue()) - li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth); - SSRefs[FI].push_back(MI); + li.weight += LiveIntervals::getSpillWeight(false, true, Freq); + } + for (MachineInstr::mmo_iterator MMOI = MI->memoperands_begin(), + EE = MI->memoperands_end(); MMOI != EE; ++MMOI) { + MachineMemOperand *MMO = *MMOI; + if (const Value *V = MMO->getValue()) { + if (const FixedStackPseudoSourceValue *FSV = + dyn_cast<FixedStackPseudoSourceValue>(V)) { + int FI = FSV->getFrameIndex(); + if (FI >= 0) + SSRefs[FI].push_back(MMO); + } + } } } } @@ -197,7 +211,7 @@ void StackSlotColoring::InitializeSlots() { /// LiveIntervals that have already been assigned to the specified color. bool StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { - const SmallVector<LiveInterval*,4> &OtherLIs = Assignments[Color]; + const SmallVectorImpl<LiveInterval *> &OtherLIs = Assignments[Color]; for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) { LiveInterval *OtherLI = OtherLIs[i]; if (OtherLI->overlaps(*li)) @@ -291,16 +305,26 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { if (!Changed) return false; - // Rewrite all MO_FrameIndex operands. - SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs()); + // Rewrite all MachineMemOperands. for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) { int NewFI = SlotMapping[SS]; if (NewFI == -1 || (NewFI == (int)SS)) continue; - SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS]; - for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) - RewriteInstruction(RefMIs[i], SS, NewFI, MF); + const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI); + SmallVectorImpl<MachineMemOperand *> &RefMMOs = SSRefs[SS]; + for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i) + RefMMOs[i]->setValue(NewSV); + } + + // Rewrite all MO_FrameIndex operands. Look for dead stores. + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = &*MBBI; + for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); + MII != EE; ++MII) + RewriteInstruction(MII, SlotMapping, MF); + RemoveDeadStores(MBB); } // Delete unused stack slots. @@ -315,28 +339,24 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { /// RewriteInstruction - Rewrite specified instruction by replacing references /// to old frame index with new one. -void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, - int NewFI, MachineFunction &MF) { +void StackSlotColoring::RewriteInstruction(MachineInstr *MI, + SmallVectorImpl<int> &SlotMapping, + MachineFunction &MF) { // Update the operands. for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isFI()) continue; - int FI = MO.getIndex(); - if (FI != OldFI) + int OldFI = MO.getIndex(); + if (OldFI < 0) + continue; + int NewFI = SlotMapping[OldFI]; + if (NewFI == -1 || NewFI == OldFI) continue; MO.setIndex(NewFI); } - // Update the memory references. This changes the MachineMemOperands - // directly. They may be in use by multiple instructions, however all - // instructions using OldFI are being rewritten to use NewFI. - const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI); - const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI); - for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), - E = MI->memoperands_end(); I != E; ++I) - if ((*I)->getValue() == OldSV) - (*I)->setValue(NewSV); + // The MachineMemOperands have already been updated. } @@ -357,10 +377,19 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { if (DCELimit != -1 && (int)NumDead >= DCELimit) break; + int FirstSS, SecondSS; + if (TII->isStackSlotCopy(I, FirstSS, SecondSS) && + FirstSS == SecondSS && + FirstSS != -1) { + ++NumDead; + changed = true; + toErase.push_back(I); + continue; + } + MachineBasicBlock::iterator NextMI = llvm::next(I); if (NextMI == MBB->end()) continue; - int FirstSS, SecondSS; unsigned LoadReg = 0; unsigned StoreReg = 0; if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue; @@ -379,7 +408,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { ++I; } - for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(), + for (SmallVectorImpl<MachineInstr *>::iterator I = toErase.begin(), E = toErase.end(); I != E; ++I) (*I)->eraseFromParent(); @@ -396,7 +425,7 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { MFI = MF.getFrameInfo(); TII = MF.getTarget().getInstrInfo(); LS = &getAnalysis<LiveStacks>(); - loopInfo = &getAnalysis<MachineLoopInfo>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); bool Changed = false; @@ -430,10 +459,5 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { Assignments[i].clear(); Assignments.clear(); - if (Changed) { - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - Changed |= RemoveDeadStores(I); - } - return Changed; } diff --git a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp deleted file mode 100644 index b337c53..0000000 --- a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp +++ /dev/null @@ -1,825 +0,0 @@ -//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass eliminates PHI instructions by aggressively coalescing the copies -// that would be inserted by a naive algorithm and only inserting the copies -// that are necessary. The coalescing technique initially assumes that all -// registers appearing in a PHI instruction do not interfere. It then eliminates -// proven interferences, using dominators to only perform a linear number of -// interference tests instead of the quadratic number of interference tests -// that this would naively require. This is a technique derived from: -// -// Budimlic, et al. Fast copy coalescing and live-range identification. -// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language -// Design and Implementation (Berlin, Germany, June 17 - 19, 2002). -// PLDI '02. ACM, New York, NY, 25-32. -// -// The original implementation constructs a data structure they call a dominance -// forest for this purpose. The dominance forest was shown to be unnecessary, -// as it is possible to emulate the creation and traversal of a dominance forest -// by directly using the dominator tree, rather than actually constructing the -// dominance forest. This technique is explained in: -// -// Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code -// Quality and Efficiency, -// In Proceedings of the 7th annual IEEE/ACM International Symposium on Code -// Generation and Optimization (Seattle, Washington, March 22 - 25, 2009). -// CGO '09. IEEE, Washington, DC, 114-125. -// -// Careful implementation allows for all of the dominator forest interference -// checks to be performed at once in a single depth-first traversal of the -// dominator tree, which is what is implemented here. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "strongphielim" -#include "llvm/CodeGen/Passes.h" -#include "PHIEliminationUtils.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetInstrInfo.h" -using namespace llvm; - -namespace { - class StrongPHIElimination : public MachineFunctionPass { - public: - static char ID; // Pass identification, replacement for typeid - StrongPHIElimination() : MachineFunctionPass(ID) { - initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage&) const; - bool runOnMachineFunction(MachineFunction&); - - private: - /// This struct represents a single node in the union-find data structure - /// representing the variable congruence classes. There is one difference - /// from a normal union-find data structure. We steal two bits from the parent - /// pointer . One of these bits is used to represent whether the register - /// itself has been isolated, and the other is used to represent whether the - /// PHI with that register as its destination has been isolated. - /// - /// Note that this leads to the strange situation where the leader of a - /// congruence class may no longer logically be a member, due to being - /// isolated. - struct Node { - enum Flags { - kRegisterIsolatedFlag = 1, - kPHIIsolatedFlag = 2 - }; - Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); } - - Node *getLeader(); - - PointerIntPair<Node*, 2> parent; - unsigned value; - unsigned rank; - }; - - /// Add a register in a new congruence class containing only itself. - void addReg(unsigned); - - /// Join the congruence classes of two registers. This function is biased - /// towards the left argument, i.e. after - /// - /// addReg(r2); - /// unionRegs(r1, r2); - /// - /// the leader of the unioned congruence class is the same as the leader of - /// r1's congruence class prior to the union. This is actually relied upon - /// in the copy insertion code. - void unionRegs(unsigned, unsigned); - - /// Get the color of a register. The color is 0 if the register has been - /// isolated. - unsigned getRegColor(unsigned); - - // Isolate a register. - void isolateReg(unsigned); - - /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been - /// isolated. Otherwise, it is the original color of its destination and - /// all of its operands (before they were isolated, if they were). - unsigned getPHIColor(MachineInstr*); - - /// Isolate a PHI. - void isolatePHI(MachineInstr*); - - /// Traverses a basic block, splitting any interferences found between - /// registers in the same congruence class. It takes two DenseMaps as - /// arguments that it also updates: CurrentDominatingParent, which maps - /// a color to the register in that congruence class whose definition was - /// most recently seen, and ImmediateDominatingParent, which maps a register - /// to the register in the same congruence class that most immediately - /// dominates it. - /// - /// This function assumes that it is being called in a depth-first traversal - /// of the dominator tree. - void SplitInterferencesForBasicBlock( - MachineBasicBlock&, - DenseMap<unsigned, unsigned> &CurrentDominatingParent, - DenseMap<unsigned, unsigned> &ImmediateDominatingParent); - - // Lowers a PHI instruction, inserting copies of the source and destination - // registers as necessary. - void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*); - - // Merges the live interval of Reg into NewReg and renames Reg to NewReg - // everywhere that Reg appears. Requires Reg and NewReg to have non- - // overlapping lifetimes. - void MergeLIsAndRename(unsigned Reg, unsigned NewReg); - - MachineRegisterInfo *MRI; - const TargetInstrInfo *TII; - MachineDominatorTree *DT; - LiveIntervals *LI; - - BumpPtrAllocator Allocator; - - DenseMap<unsigned, Node*> RegNodeMap; - - // Maps a basic block to a list of its defs of registers that appear as PHI - // sources. - DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs; - - // Maps a color to a pair of a MachineInstr* and a virtual register, which - // is the operand of that PHI corresponding to the current basic block. - DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor; - - // FIXME: Can these two data structures be combined? Would a std::multimap - // be any better? - - // Stores pairs of predecessor basic blocks and the source registers of - // inserted copy instructions. - typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet; - SrcCopySet InsertedSrcCopySet; - - // Maps pairs of predecessor basic blocks and colors to their defining copy - // instructions. - typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*> - SrcCopyMap; - SrcCopyMap InsertedSrcCopyMap; - - // Maps inserted destination copy registers to their defining copy - // instructions. - typedef DenseMap<unsigned, MachineInstr*> DestCopyMap; - DestCopyMap InsertedDestCopies; - }; - - struct MIIndexCompare { - MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { } - - bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { - return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS); - } - - LiveIntervals *LI; - }; -} // namespace - -STATISTIC(NumPHIsLowered, "Number of PHIs lowered"); -STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted"); -STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted"); - -char StrongPHIElimination::ID = 0; -INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination", - "Eliminate PHI nodes for register allocation, intelligently", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination", - "Eliminate PHI nodes for register allocation, intelligently", false, false) - -char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID; - -void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<SlotIndexes>(); - AU.addPreserved<SlotIndexes>(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<LiveIntervals>(); - MachineFunctionPass::getAnalysisUsage(AU); -} - -static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) { - // FIXME: This only needs to check from the first terminator, as only the - // first terminator can use a virtual register. - for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) { - assert (RI != MBB->rend()); - MachineInstr *MI = &*RI; - - for (MachineInstr::mop_iterator OI = MI->operands_begin(), - OE = MI->operands_end(); OI != OE; ++OI) { - MachineOperand &MO = *OI; - if (MO.isReg() && MO.isUse() && MO.getReg() == Reg) - return &MO; - } - } -} - -bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { - MRI = &MF.getRegInfo(); - TII = MF.getTarget().getInstrInfo(); - DT = &getAnalysis<MachineDominatorTree>(); - LI = &getAnalysis<LiveIntervals>(); - - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { - unsigned DestReg = BBI->getOperand(0).getReg(); - addReg(DestReg); - PHISrcDefs[I].push_back(BBI); - - for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { - MachineOperand &SrcMO = BBI->getOperand(i); - unsigned SrcReg = SrcMO.getReg(); - addReg(SrcReg); - unionRegs(DestReg, SrcReg); - - MachineInstr *DefMI = MRI->getVRegDef(SrcReg); - if (DefMI) - PHISrcDefs[DefMI->getParent()].push_back(DefMI); - } - } - } - - // Perform a depth-first traversal of the dominator tree, splitting - // interferences amongst PHI-congruence classes. - DenseMap<unsigned, unsigned> CurrentDominatingParent; - DenseMap<unsigned, unsigned> ImmediateDominatingParent; - for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()), - DE = df_end(DT->getRootNode()); DI != DE; ++DI) { - SplitInterferencesForBasicBlock(*DI->getBlock(), - CurrentDominatingParent, - ImmediateDominatingParent); - } - - // Insert copies for all PHI source and destination registers. - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { - InsertCopiesForPHI(BBI, I); - } - } - - // FIXME: Preserve the equivalence classes during copy insertion and use - // the preversed equivalence classes instead of recomputing them. - RegNodeMap.clear(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { - unsigned DestReg = BBI->getOperand(0).getReg(); - addReg(DestReg); - - for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { - unsigned SrcReg = BBI->getOperand(i).getReg(); - addReg(SrcReg); - unionRegs(DestReg, SrcReg); - } - } - } - - DenseMap<unsigned, unsigned> RegRenamingMap; - bool Changed = false; - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - while (BBI != BBE && BBI->isPHI()) { - MachineInstr *PHI = BBI; - - assert(PHI->getNumOperands() > 0); - - unsigned SrcReg = PHI->getOperand(1).getReg(); - unsigned SrcColor = getRegColor(SrcReg); - unsigned NewReg = RegRenamingMap[SrcColor]; - if (!NewReg) { - NewReg = SrcReg; - RegRenamingMap[SrcColor] = SrcReg; - } - MergeLIsAndRename(SrcReg, NewReg); - - unsigned DestReg = PHI->getOperand(0).getReg(); - if (!InsertedDestCopies.count(DestReg)) - MergeLIsAndRename(DestReg, NewReg); - - for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) { - unsigned SrcReg = PHI->getOperand(i).getReg(); - MergeLIsAndRename(SrcReg, NewReg); - } - - ++BBI; - LI->RemoveMachineInstrFromMaps(PHI); - PHI->eraseFromParent(); - Changed = true; - } - } - - // Due to the insertion of copies to split live ranges, the live intervals are - // guaranteed to not overlap, except in one case: an original PHI source and a - // PHI destination copy. In this case, they have the same value and thus don't - // truly intersect, so we merge them into the value live at that point. - // FIXME: Is there some better way we can handle this? - for (DestCopyMap::iterator I = InsertedDestCopies.begin(), - E = InsertedDestCopies.end(); I != E; ++I) { - unsigned DestReg = I->first; - unsigned DestColor = getRegColor(DestReg); - unsigned NewReg = RegRenamingMap[DestColor]; - - LiveInterval &DestLI = LI->getInterval(DestReg); - LiveInterval &NewLI = LI->getInterval(NewReg); - - assert(DestLI.ranges.size() == 1 - && "PHI destination copy's live interval should be a single live " - "range from the beginning of the BB to the copy instruction."); - LiveRange *DestLR = DestLI.begin(); - VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start); - if (!NewVNI) { - NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator()); - MachineInstr *CopyInstr = I->second; - CopyInstr->getOperand(1).setIsKill(true); - } - - LiveRange NewLR(DestLR->start, DestLR->end, NewVNI); - NewLI.addRange(NewLR); - - LI->removeInterval(DestReg); - MRI->replaceRegWith(DestReg, NewReg); - } - - // Adjust the live intervals of all PHI source registers to handle the case - // where the PHIs in successor blocks were the only later uses of the source - // register. - for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(), - E = InsertedSrcCopySet.end(); I != E; ++I) { - MachineBasicBlock *MBB = I->first; - unsigned SrcReg = I->second; - if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)]) - SrcReg = RenamedRegister; - - LiveInterval &SrcLI = LI->getInterval(SrcReg); - - bool isLiveOut = false; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) { - isLiveOut = true; - break; - } - } - - if (isLiveOut) - continue; - - MachineOperand *LastUse = findLastUse(MBB, SrcReg); - assert(LastUse); - SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); - SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB)); - LastUse->setIsKill(true); - } - - Allocator.Reset(); - RegNodeMap.clear(); - PHISrcDefs.clear(); - InsertedSrcCopySet.clear(); - InsertedSrcCopyMap.clear(); - InsertedDestCopies.clear(); - - return Changed; -} - -void StrongPHIElimination::addReg(unsigned Reg) { - Node *&N = RegNodeMap[Reg]; - if (!N) - N = new (Allocator) Node(Reg); -} - -StrongPHIElimination::Node* -StrongPHIElimination::Node::getLeader() { - Node *N = this; - Node *Parent = parent.getPointer(); - Node *Grandparent = Parent->parent.getPointer(); - - while (Parent != Grandparent) { - N->parent.setPointer(Grandparent); - N = Grandparent; - Parent = Parent->parent.getPointer(); - Grandparent = Parent->parent.getPointer(); - } - - return Parent; -} - -unsigned StrongPHIElimination::getRegColor(unsigned Reg) { - DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg); - if (RI == RegNodeMap.end()) - return 0; - Node *Node = RI->second; - if (Node->parent.getInt() & Node::kRegisterIsolatedFlag) - return 0; - return Node->getLeader()->value; -} - -void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) { - Node *Node1 = RegNodeMap[Reg1]->getLeader(); - Node *Node2 = RegNodeMap[Reg2]->getLeader(); - - if (Node1->rank > Node2->rank) { - Node2->parent.setPointer(Node1->getLeader()); - } else if (Node1->rank < Node2->rank) { - Node1->parent.setPointer(Node2->getLeader()); - } else if (Node1 != Node2) { - Node2->parent.setPointer(Node1->getLeader()); - Node1->rank++; - } -} - -void StrongPHIElimination::isolateReg(unsigned Reg) { - Node *Node = RegNodeMap[Reg]; - Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag); -} - -unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) { - assert(PHI->isPHI()); - - unsigned DestReg = PHI->getOperand(0).getReg(); - Node *DestNode = RegNodeMap[DestReg]; - if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag) - return 0; - - for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { - unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg()); - if (SrcColor) - return SrcColor; - } - return 0; -} - -void StrongPHIElimination::isolatePHI(MachineInstr *PHI) { - assert(PHI->isPHI()); - Node *Node = RegNodeMap[PHI->getOperand(0).getReg()]; - Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag); -} - -/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any -/// interferences found between registers in the same congruence class. It -/// takes two DenseMaps as arguments that it also updates: -/// -/// 1) CurrentDominatingParent, which maps a color to the register in that -/// congruence class whose definition was most recently seen. -/// -/// 2) ImmediateDominatingParent, which maps a register to the register in the -/// same congruence class that most immediately dominates it. -/// -/// This function assumes that it is being called in a depth-first traversal -/// of the dominator tree. -/// -/// The algorithm used here is a generalization of the dominance-based SSA test -/// for two variables. If there are variables a_1, ..., a_n such that -/// -/// def(a_1) dom ... dom def(a_n), -/// -/// then we can test for an interference between any two a_i by only using O(n) -/// interference tests between pairs of variables. If i < j and a_i and a_j -/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1). -/// Thus, in order to test for an interference involving a_i, we need only check -/// for a potential interference with a_i+1. -/// -/// This method can be generalized to arbitrary sets of variables by performing -/// a depth-first traversal of the dominator tree. As we traverse down a branch -/// of the dominator tree, we keep track of the current dominating variable and -/// only perform an interference test with that variable. However, when we go to -/// another branch of the dominator tree, the definition of the current dominating -/// variable may no longer dominate the current block. In order to correct this, -/// we need to use a stack of past choices of the current dominating variable -/// and pop from this stack until we find a variable whose definition actually -/// dominates the current block. -/// -/// There will be one push on this stack for each variable that has become the -/// current dominating variable, so instead of using an explicit stack we can -/// simply associate the previous choice for a current dominating variable with -/// the new choice. This works better in our implementation, where we test for -/// interference in multiple distinct sets at once. -void -StrongPHIElimination::SplitInterferencesForBasicBlock( - MachineBasicBlock &MBB, - DenseMap<unsigned, unsigned> &CurrentDominatingParent, - DenseMap<unsigned, unsigned> &ImmediateDominatingParent) { - // Sort defs by their order in the original basic block, as the code below - // assumes that it is processing definitions in dominance order. - std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB]; - std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI)); - - for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(), - BBE = DefInstrs.end(); BBI != BBE; ++BBI) { - for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(), - E = (*BBI)->operands_end(); I != E; ++I) { - const MachineOperand &MO = *I; - - // FIXME: This would be faster if it were possible to bail out of checking - // an instruction's operands after the explicit defs, but this is incorrect - // for variadic instructions, which may appear before register allocation - // in the future. - if (!MO.isReg() || !MO.isDef()) - continue; - - unsigned DestReg = MO.getReg(); - if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg)) - continue; - - // If the virtual register being defined is not used in any PHI or has - // already been isolated, then there are no more interferences to check. - unsigned DestColor = getRegColor(DestReg); - if (!DestColor) - continue; - - // The input to this pass sometimes is not in SSA form in every basic - // block, as some virtual registers have redefinitions. We could eliminate - // this by fixing the passes that generate the non-SSA code, or we could - // handle it here by tracking defining machine instructions rather than - // virtual registers. For now, we just handle the situation conservatively - // in a way that will possibly lead to false interferences. - unsigned &CurrentParent = CurrentDominatingParent[DestColor]; - unsigned NewParent = CurrentParent; - if (NewParent == DestReg) - continue; - - // Pop registers from the stack represented by ImmediateDominatingParent - // until we find a parent that dominates the current instruction. - while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI) - || !getRegColor(NewParent))) - NewParent = ImmediateDominatingParent[NewParent]; - - // If NewParent is nonzero, then its definition dominates the current - // instruction, so it is only necessary to check for the liveness of - // NewParent in order to check for an interference. - if (NewParent - && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) { - // If there is an interference, always isolate the new register. This - // could be improved by using a heuristic that decides which of the two - // registers to isolate. - isolateReg(DestReg); - CurrentParent = NewParent; - } else { - // If there is no interference, update ImmediateDominatingParent and set - // the CurrentDominatingParent for this color to the current register. - ImmediateDominatingParent[DestReg] = NewParent; - CurrentParent = DestReg; - } - } - } - - // We now walk the PHIs in successor blocks and check for interferences. This - // is necessary because the use of a PHI's operands are logically contained in - // the predecessor block. The def of a PHI's destination register is processed - // along with the other defs in a basic block. - - CurrentPHIForColor.clear(); - - for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), - SE = MBB.succ_end(); SI != SE; ++SI) { - for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { - MachineInstr *PHI = BBI; - - // If a PHI is already isolated, either by being isolated directly or - // having all of its operands isolated, ignore it. - unsigned Color = getPHIColor(PHI); - if (!Color) - continue; - - // Find the index of the PHI operand that corresponds to this basic block. - unsigned PredIndex; - for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) { - if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB) - break; - } - assert(PredIndex < PHI->getNumOperands()); - unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg(); - - // Pop registers from the stack represented by ImmediateDominatingParent - // until we find a parent that dominates the current instruction. - unsigned &CurrentParent = CurrentDominatingParent[Color]; - unsigned NewParent = CurrentParent; - while (NewParent - && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB) - || !getRegColor(NewParent))) - NewParent = ImmediateDominatingParent[NewParent]; - CurrentParent = NewParent; - - // If there is an interference with a register, always isolate the - // register rather than the PHI. It is also possible to isolate the - // PHI, but that introduces copies for all of the registers involved - // in that PHI. - if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB) - && NewParent != PredOperandReg) - isolateReg(NewParent); - - std::pair<MachineInstr*, unsigned> - &CurrentPHI = CurrentPHIForColor[Color]; - - // If two PHIs have the same operand from every shared predecessor, then - // they don't actually interfere. Otherwise, isolate the current PHI. This - // could possibly be improved, e.g. we could isolate the PHI with the - // fewest operands. - if (CurrentPHI.first && CurrentPHI.second != PredOperandReg) - isolatePHI(PHI); - else - CurrentPHI = std::make_pair(PHI, PredOperandReg); - } - } -} - -void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, - MachineBasicBlock *MBB) { - assert(PHI->isPHI()); - ++NumPHIsLowered; - unsigned PHIColor = getPHIColor(PHI); - - for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { - MachineOperand &SrcMO = PHI->getOperand(i); - - // If a source is defined by an implicit def, there is no need to insert a - // copy in the predecessor. - if (SrcMO.isUndef()) - continue; - - unsigned SrcReg = SrcMO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && - "Machine PHI Operands must all be virtual registers!"); - - MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB(); - unsigned SrcColor = getRegColor(SrcReg); - - // If neither the PHI nor the operand were isolated, then we only need to - // set the phi-kill flag on the VNInfo at this PHI. - if (PHIColor && SrcColor == PHIColor) { - LiveInterval &SrcInterval = LI->getInterval(SrcReg); - SlotIndex PredIndex = LI->getMBBEndIdx(PredBB); - VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex); - (void)SrcVNI; - assert(SrcVNI); - continue; - } - - unsigned CopyReg = 0; - if (PHIColor) { - SrcCopyMap::const_iterator I - = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor)); - CopyReg - = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0; - } - - if (!CopyReg) { - const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); - CopyReg = MRI->createVirtualRegister(RC); - - MachineBasicBlock::iterator - CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg); - unsigned SrcSubReg = SrcMO.getSubReg(); - MachineInstr *CopyInstr = BuildMI(*PredBB, - CopyInsertPoint, - PHI->getDebugLoc(), - TII->get(TargetOpcode::COPY), - CopyReg).addReg(SrcReg, 0, SrcSubReg); - LI->InsertMachineInstrInMaps(CopyInstr); - ++NumSrcCopiesInserted; - - // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for - // the newly added range. - LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr); - InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg)); - - addReg(CopyReg); - if (PHIColor) { - unionRegs(PHIColor, CopyReg); - assert(getRegColor(CopyReg) != CopyReg); - } else { - PHIColor = CopyReg; - assert(getRegColor(CopyReg) == CopyReg); - } - - // Insert into map if not already there. - InsertedSrcCopyMap.insert(std::make_pair(std::make_pair(PredBB, PHIColor), - CopyInstr)); - } - - SrcMO.setReg(CopyReg); - - // If SrcReg is not live beyond the PHI, trim its interval so that it is no - // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are - // processed later, but this is still correct to do at this point because we - // never rely on LiveIntervals being correct while inserting copies. - // FIXME: Should this just count uses at PHIs like the normal PHIElimination - // pass does? - LiveInterval &SrcLI = LI->getInterval(SrcReg); - SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); - SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - SlotIndex NextInstrIndex = PHIIndex.getNextIndex(); - if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex)) - SrcLI.removeRange(MBBStartIndex, PHIIndex, true); - } - - unsigned DestReg = PHI->getOperand(0).getReg(); - unsigned DestColor = getRegColor(DestReg); - - if (PHIColor && DestColor == PHIColor) { - LiveInterval &DestLI = LI->getInterval(DestReg); - - // Set the phi-def flag for the VN at this PHI. - SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot()); - assert(DestVNI); - - // Prior to PHI elimination, the live ranges of PHIs begin at their defining - // instruction. After PHI elimination, PHI instructions are replaced by VNs - // with the phi-def flag set, and the live ranges of these VNs start at the - // beginning of the basic block. - SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); - DestVNI->def = MBBStartIndex; - DestLI.addRange(LiveRange(MBBStartIndex, - PHIIndex.getRegSlot(), - DestVNI)); - return; - } - - const TargetRegisterClass *RC = MRI->getRegClass(DestReg); - unsigned CopyReg = MRI->createVirtualRegister(RC); - - MachineInstr *CopyInstr = BuildMI(*MBB, - MBB->SkipPHIsAndLabels(MBB->begin()), - PHI->getDebugLoc(), - TII->get(TargetOpcode::COPY), - DestReg).addReg(CopyReg); - LI->InsertMachineInstrInMaps(CopyInstr); - PHI->getOperand(0).setReg(CopyReg); - ++NumDestCopiesInserted; - - // Add the region from the beginning of MBB to the copy instruction to - // CopyReg's live interval, and give the VNInfo the phidef flag. - LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg); - SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); - SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr); - VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex, - LI->getVNInfoAllocator()); - CopyLI.addRange(LiveRange(MBBStartIndex, - DestCopyIndex.getRegSlot(), - CopyVNI)); - - // Adjust DestReg's live interval to adjust for its new definition at - // CopyInstr. - LiveInterval &DestLI = LI->getOrCreateInterval(DestReg); - SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot()); - - VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); - assert(DestVNI); - DestVNI->def = DestCopyIndex.getRegSlot(); - - InsertedDestCopies[CopyReg] = CopyInstr; -} - -void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) { - if (Reg == NewReg) - return; - - LiveInterval &OldLI = LI->getInterval(Reg); - LiveInterval &NewLI = LI->getInterval(NewReg); - - // Merge the live ranges of the two registers. - DenseMap<VNInfo*, VNInfo*> VNMap; - for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end(); - LRI != LRE; ++LRI) { - LiveRange OldLR = *LRI; - VNInfo *OldVN = OldLR.valno; - - VNInfo *&NewVN = VNMap[OldVN]; - if (!NewVN) { - NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator()); - VNMap[OldVN] = NewVN; - } - - LiveRange LR(OldLR.start, OldLR.end, NewVN); - NewLI.addRange(LR); - } - - // Remove the LiveInterval for the register being renamed and replace all - // of its defs and uses with the new register. - LI->removeInterval(Reg); - MRI->replaceRegWith(Reg, NewReg); -} diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp index 1ec8817..ff0181e 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -86,7 +86,7 @@ namespace { void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, DenseMap<unsigned, unsigned> &LocalVRMap, - SmallVector<std::pair<unsigned,unsigned>, 4> &Copies, + SmallVectorImpl<std::pair<unsigned,unsigned> > &Copies, const DenseSet<unsigned> &UsedByPhi, bool Remove); void DuplicateInstruction(MachineInstr *MI, @@ -96,7 +96,7 @@ namespace { DenseMap<unsigned, unsigned> &LocalVRMap, const DenseSet<unsigned> &UsedByPhi); void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, - SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, SmallSetVector<MachineBasicBlock*, 8> &Succs); bool TailDuplicateBlocks(MachineFunction &MF); bool shouldTailDuplicate(const MachineFunction &MF, @@ -104,14 +104,14 @@ namespace { bool isSimpleBB(MachineBasicBlock *TailBB); bool canCompletelyDuplicateBB(MachineBasicBlock &BB); bool duplicateSimpleBB(MachineBasicBlock *TailBB, - SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, const DenseSet<unsigned> &RegsUsedByPhi, - SmallVector<MachineInstr*, 16> &Copies); + SmallVectorImpl<MachineInstr *> &Copies); bool TailDuplicate(MachineBasicBlock *TailBB, bool IsSimple, MachineFunction &MF, - SmallVector<MachineBasicBlock*, 8> &TDBBs, - SmallVector<MachineInstr*, 16> &Copies); + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + SmallVectorImpl<MachineInstr *> &Copies); bool TailDuplicateAndUpdate(MachineBasicBlock *MBB, bool IsSimple, MachineFunction &MF); @@ -382,13 +382,11 @@ void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, /// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB. /// Remember the source register that's contributed by PredBB and update SSA /// update map. -void TailDuplicatePass::ProcessPHI(MachineInstr *MI, - MachineBasicBlock *TailBB, - MachineBasicBlock *PredBB, - DenseMap<unsigned, unsigned> &LocalVRMap, - SmallVector<std::pair<unsigned,unsigned>, 4> &Copies, - const DenseSet<unsigned> &RegsUsedByPhi, - bool Remove) { +void TailDuplicatePass::ProcessPHI( + MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, + DenseMap<unsigned, unsigned> &LocalVRMap, + SmallVectorImpl<std::pair<unsigned, unsigned> > &Copies, + const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) { unsigned DefReg = MI->getOperand(0).getReg(); unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); assert(SrcOpIdx && "Unable to find matching PHI source?"); @@ -452,7 +450,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, /// instructions in them accordingly. void TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, - SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, SmallSetVector<MachineBasicBlock*,8> &Succs) { for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), SE = Succs.end(); SI != SE; ++SI) { @@ -640,8 +638,6 @@ bothUsedInPHI(const MachineBasicBlock &A, bool TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { - SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end()); - for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(), PE = BB.pred_end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; @@ -662,9 +658,9 @@ TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { bool TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, - SmallVector<MachineBasicBlock*, 8> &TDBBs, - const DenseSet<unsigned> &UsedByPhi, - SmallVector<MachineInstr*, 16> &Copies) { + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + const DenseSet<unsigned> &UsedByPhi, + SmallVectorImpl<MachineInstr *> &Copies) { SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(), TailBB->succ_end()); SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), @@ -742,8 +738,8 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, bool IsSimple, MachineFunction &MF, - SmallVector<MachineBasicBlock*, 8> &TDBBs, - SmallVector<MachineInstr*, 16> &Copies) { + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + SmallVectorImpl<MachineInstr *> &Copies) { DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); DenseSet<unsigned> UsedByPhi; diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp index 20eb918..bf4fd65 100644 --- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" @@ -276,6 +277,36 @@ bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI, return false; } +bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, + unsigned SubIdx, unsigned &Size, + unsigned &Offset, + const TargetMachine *TM) const { + if (!SubIdx) { + Size = RC->getSize(); + Offset = 0; + return true; + } + unsigned BitSize = TM->getRegisterInfo()->getSubRegIdxSize(SubIdx); + // Convert bit size to byte size to be consistent with + // MCRegisterClass::getSize(). + if (BitSize % 8) + return false; + + int BitOffset = TM->getRegisterInfo()->getSubRegIdxOffset(SubIdx); + if (BitOffset < 0 || BitOffset % 8) + return false; + + Size = BitSize /= 8; + Offset = (unsigned)BitOffset / 8; + + assert(RC->getSize() >= (Offset + Size) && "bad subregister range"); + + if (!TM->getDataLayout()->isLittleEndian()) { + Offset = RC->getSize() - (Offset + Size); + } + return true; +} + void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, @@ -364,6 +395,7 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, // Ask the target to do the actual folding. if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) { + NewMI->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); // Add a memory operand, foldMemoryOperandImpl doesn't do that. assert((!(Flags & MachineMemOperand::MOStore) || NewMI->mayStore()) && @@ -424,9 +456,19 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, NewMI = MBB.insert(MI, NewMI); // Copy the memoperands from the load to the folded instruction. - NewMI->setMemRefs(LoadMI->memoperands_begin(), - LoadMI->memoperands_end()); - + if (MI->memoperands_empty()) { + NewMI->setMemRefs(LoadMI->memoperands_begin(), + LoadMI->memoperands_end()); + } + else { + // Handle the rare case of folding multiple loads. + NewMI->setMemRefs(MI->memoperands_begin(), + MI->memoperands_end()); + for (MachineInstr::mmo_iterator I = LoadMI->memoperands_begin(), + E = LoadMI->memoperands_end(); I != E; ++I) { + NewMI->addMemOperand(MF, *I); + } + } return NewMI; } @@ -630,6 +672,10 @@ unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel, return 1; } +unsigned TargetInstrInfo::getPredicationCost(const MachineInstr *) const { + return 0; +} + unsigned TargetInstrInfo:: getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, @@ -668,27 +714,13 @@ getOperandLatency(const InstrItineraryData *ItinData, /// lookup, do so. Otherwise return -1. int TargetInstrInfo::computeDefOperandLatency( const InstrItineraryData *ItinData, - const MachineInstr *DefMI, bool FindMin) const { + const MachineInstr *DefMI) const { // Let the target hook getInstrLatency handle missing itineraries. if (!ItinData) return getInstrLatency(ItinData, DefMI); - // Return a latency based on the itinerary properties and defining instruction - // if possible. Some common subtargets don't require per-operand latency, - // especially for minimum latencies. - if (FindMin) { - // If MinLatency is valid, call getInstrLatency. This uses Stage latency if - // it exists before defaulting to MinLatency. - if (ItinData->SchedModel->MinLatency >= 0) - return getInstrLatency(ItinData, DefMI); - - // If MinLatency is invalid, OperandLatency is interpreted as MinLatency. - // For empty itineraries, short-cirtuit the check and default to one cycle. - if (ItinData->isEmpty()) - return 1; - } - else if(ItinData->isEmpty()) + if(ItinData->isEmpty()) return defaultDefLatency(ItinData->SchedModel, DefMI); // ...operand lookup required @@ -709,10 +741,9 @@ int TargetInstrInfo::computeDefOperandLatency( unsigned TargetInstrInfo:: computeOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx, - bool FindMin) const { + const MachineInstr *UseMI, unsigned UseIdx) const { - int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin); + int DefLatency = computeDefOperandLatency(ItinData, DefMI); if (DefLatency >= 0) return DefLatency; @@ -732,8 +763,7 @@ computeOperandLatency(const InstrItineraryData *ItinData, unsigned InstrLatency = getInstrLatency(ItinData, DefMI); // Expected latency is the max of the stage latency and itinerary props. - if (!FindMin) - InstrLatency = std::max(InstrLatency, - defaultDefLatency(ItinData->SchedModel, DefMI)); + InstrLatency = std::max(InstrLatency, + defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; } diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp index 8074d16..30305af 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -191,6 +191,11 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; Names[RTLIB::NEARBYINT_F128] = "nearbyintl"; Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; + Names[RTLIB::ROUND_F32] = "roundf"; + Names[RTLIB::ROUND_F64] = "round"; + Names[RTLIB::ROUND_F80] = "roundl"; + Names[RTLIB::ROUND_F128] = "roundl"; + Names[RTLIB::ROUND_PPCF128] = "roundl"; Names[RTLIB::FLOOR_F32] = "floorf"; Names[RTLIB::FLOOR_F64] = "floor"; Names[RTLIB::FLOOR_F80] = "floorl"; @@ -313,34 +318,62 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16] = "__sync_val_compare_and_swap_16"; Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_16] = "__sync_lock_test_and_set_16"; Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; + Names[RTLIB::SYNC_FETCH_AND_ADD_16] = "__sync_fetch_and_add_16"; Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; + Names[RTLIB::SYNC_FETCH_AND_SUB_16] = "__sync_fetch_and_sub_16"; Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; + Names[RTLIB::SYNC_FETCH_AND_AND_16] = "__sync_fetch_and_and_16"; Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; + Names[RTLIB::SYNC_FETCH_AND_OR_16] = "__sync_fetch_and_or_16"; Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4"; Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; + Names[RTLIB::SYNC_FETCH_AND_XOR_16] = "__sync_fetch_and_xor_16"; Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; + Names[RTLIB::SYNC_FETCH_AND_NAND_16] = "__sync_fetch_and_nand_16"; + Names[RTLIB::SYNC_FETCH_AND_MAX_1] = "__sync_fetch_and_max_1"; + Names[RTLIB::SYNC_FETCH_AND_MAX_2] = "__sync_fetch_and_max_2"; + Names[RTLIB::SYNC_FETCH_AND_MAX_4] = "__sync_fetch_and_max_4"; + Names[RTLIB::SYNC_FETCH_AND_MAX_8] = "__sync_fetch_and_max_8"; + Names[RTLIB::SYNC_FETCH_AND_MAX_16] = "__sync_fetch_and_max_16"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_1] = "__sync_fetch_and_umax_1"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_2] = "__sync_fetch_and_umax_2"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_4] = "__sync_fetch_and_umax_4"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_8] = "__sync_fetch_and_umax_8"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_16] = "__sync_fetch_and_umax_16"; + Names[RTLIB::SYNC_FETCH_AND_MIN_1] = "__sync_fetch_and_min_1"; + Names[RTLIB::SYNC_FETCH_AND_MIN_2] = "__sync_fetch_and_min_2"; + Names[RTLIB::SYNC_FETCH_AND_MIN_4] = "__sync_fetch_and_min_4"; + Names[RTLIB::SYNC_FETCH_AND_MIN_8] = "__sync_fetch_and_min_8"; + Names[RTLIB::SYNC_FETCH_AND_MIN_16] = "__sync_fetch_and_min_16"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_1] = "__sync_fetch_and_umin_1"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_2] = "__sync_fetch_and_umin_2"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_4] = "__sync_fetch_and_umin_4"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16"; if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) { Names[RTLIB::SINCOS_F32] = "sincosf"; @@ -356,6 +389,13 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::SINCOS_F128] = 0; Names[RTLIB::SINCOS_PPCF128] = 0; } + + if (Triple(TM.getTargetTriple()).getOS() != Triple::OpenBSD) { + Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail"; + } else { + // These are generally not available. + Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = 0; + } } /// InitLibcallCallingConvs - Set default libcall CallingConvs. @@ -624,7 +664,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, // Perform these initializations only once. IsLittleEndian = TD->isLittleEndian(); - PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = MaxStoresPerMemmoveOptSize = 4; @@ -682,6 +721,14 @@ void TargetLoweringBase::initActions() { // These operations default to expand. setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); + + // These library functions default to expand. + setOperationAction(ISD::FROUND, (MVT::SimpleValueType)VT, Expand); + + // These operations default to expand for vector types. + if (VT >= MVT::FIRST_VECTOR_VALUETYPE && + VT <= MVT::LAST_VECTOR_VALUETYPE) + setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); } // Most targets ignore the @llvm.prefetch intrinsic. @@ -747,6 +794,19 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); } +MVT TargetLoweringBase::getPointerTy(uint32_t AS) const { + return MVT::getIntegerVT(getPointerSizeInBits(AS)); +} + +unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const { + return TD->getPointerSizeInBits(AS); +} + +unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const { + assert(Ty->isPointerTy()); + return getPointerSizeInBits(Ty->getPointerAddressSpace()); +} + MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const { return MVT::getIntegerVT(8*TD->getPointerSize(0)); } @@ -1033,7 +1093,7 @@ void TargetLoweringBase::computeRegisterProperties() { } } -EVT TargetLoweringBase::getSetCCResultType(EVT VT) const { +EVT TargetLoweringBase::getSetCCResultType(LLVMContext &, EVT VT) const { assert(!VT.isVector() && "No default SetCC type for vectors!"); return getPointerTy(0).SimpleTy; } @@ -1162,7 +1222,7 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0)); + Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isFixed=*/true, 0, 0)); } } @@ -1228,6 +1288,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { case PtrToInt: return ISD::BITCAST; case IntToPtr: return ISD::BITCAST; case BitCast: return ISD::BITCAST; + case AddrSpaceCast: return ISD::ADDRSPACECAST; case ICmp: return ISD::SETCC; case FCmp: return ISD::SETCC; case PHI: return 0; diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 7e7359a..59d7b57 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -52,10 +52,10 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, default: report_fatal_error("We do not support this DWARF encoding yet!"); case dwarf::DW_EH_PE_absptr: - return Mang->getSymbol(GV); + return getSymbol(*Mang, GV); case dwarf::DW_EH_PE_pcrel: { return getContext().GetOrCreateSymbol(StringRef("DW.ref.") + - Mang->getSymbol(GV)->getName()); + getSymbol(*Mang, GV)->getName()); } } } @@ -104,7 +104,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -252,7 +252,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Prefix = getSectionPrefixForGlobal(Kind); SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); Name.append(Sym->getName().begin(), Sym->getName().end()); StringRef Group = ""; unsigned Flags = getELFSectionFlags(Kind); @@ -523,6 +523,11 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, const MCSection *TargetLoweringObjectFileMachO:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { + + // Handle thread local data. + if (Kind.isThreadBSS()) return TLSBSSSection; + if (Kind.isThreadData()) return TLSDataSection; + if (Kind.isText()) return GV->isWeakForLinker() ? TextCoalSection : TextSection; @@ -575,10 +580,6 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if (Kind.isBSSLocal()) return DataBSSSection; - // Handle thread local data. - if (Kind.isThreadBSS()) return TLSBSSSection; - if (Kind.isThreadData()) return TLSDataSection; - // Otherwise, just drop the variable in the normal data section. return DataSection; } @@ -613,7 +614,7 @@ shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const { // FIXME: ObjC metadata is currently emitted as internal symbols that have // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and // this horrible hack can go away. - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l') return false; } @@ -642,7 +643,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) : MachOMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -671,7 +672,7 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -726,14 +727,14 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, if (GV->isWeakForLinker()) { Selection = COFF::IMAGE_COMDAT_SELECT_ANY; Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; - MCSymbol *Sym = Mang->getSymbol(GV); Name.append("$"); - Name.append(Sym->getName().begin() + 1, Sym->getName().end()); + Mang->getNameWithPrefix(Name, GV, false, false); } return getContext().getCOFFSection(Name, Characteristics, - Selection, - Kind); + Kind, + "", + Selection); } static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { @@ -761,24 +762,29 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if (GV->isWeakForLinker()) { const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); - MCSymbol *Sym = Mang->getSymbol(GV); - Name.append(Sym->getName().begin() + 1, Sym->getName().end()); + Mang->getNameWithPrefix(Name, GV, false, false); unsigned Characteristics = getCOFFSectionFlags(Kind); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; return getContext().getCOFFSection(Name.str(), Characteristics, - COFF::IMAGE_COMDAT_SELECT_ANY, Kind); + Kind, "", COFF::IMAGE_COMDAT_SELECT_ANY); } if (Kind.isText()) - return getTextSection(); + return TextSection; if (Kind.isThreadLocal()) - return getTLSDataSection(); + return TLSDataSection; - return getDataSection(); + if (Kind.isReadOnly()) + return ReadOnlySection; + + if (Kind.isBSS()) + return BSSSection; + + return DataSection; } void TargetLoweringObjectFileCOFF:: diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp index 435a5e7..f7bf86b 100644 --- a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Function.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Target/TargetOptions.h" @@ -21,7 +22,8 @@ using namespace llvm; bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { // Check to see if we should eliminate non-leaf frame pointers and then // check to see if we should eliminate all frame pointers. - if (NoFramePointerElimNonLeaf && !NoFramePointerElim) { + if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf") && + !NoFramePointerElim) { const MachineFrameInfo *MFI = MF.getFrameInfo(); return MFI->hasCalls(); } @@ -49,30 +51,3 @@ bool TargetOptions::HonorSignDependentRoundingFPMath() const { StringRef TargetOptions::getTrapFunctionName() const { return TrapFuncName; } - -bool TargetOptions::operator==(const TargetOptions &TO) { -#define ARE_EQUAL(X) X == TO.X - return - ARE_EQUAL(UnsafeFPMath) && - ARE_EQUAL(NoInfsFPMath) && - ARE_EQUAL(NoNaNsFPMath) && - ARE_EQUAL(HonorSignDependentRoundingFPMathOption) && - ARE_EQUAL(UseSoftFloat) && - ARE_EQUAL(NoZerosInBSS) && - ARE_EQUAL(JITExceptionHandling) && - ARE_EQUAL(JITEmitDebugInfo) && - ARE_EQUAL(JITEmitDebugInfoToDisk) && - ARE_EQUAL(GuaranteedTailCallOpt) && - ARE_EQUAL(DisableTailCalls) && - ARE_EQUAL(StackAlignmentOverride) && - ARE_EQUAL(RealignStack) && - ARE_EQUAL(SSPBufferSize) && - ARE_EQUAL(EnableFastISel) && - ARE_EQUAL(PositionIndependentExecutable) && - ARE_EQUAL(EnableSegmentedStacks) && - ARE_EQUAL(UseInitArray) && - ARE_EQUAL(TrapFuncName) && - ARE_EQUAL(FloatABIType) && - ARE_EQUAL(AllowFPOpFusion); -#undef ARE_EQUAL -} diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp index 84b4bfc..5a15243 100644 --- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -23,10 +23,12 @@ using namespace llvm; TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, regclass_iterator RCB, regclass_iterator RCE, const char *const *SRINames, - const unsigned *SRILaneMasks) + const unsigned *SRILaneMasks, + unsigned SRICoveringLanes) : InfoDesc(ID), SubRegIndexNames(SRINames), SubRegIndexLaneMasks(SRILaneMasks), - RegClassBegin(RCB), RegClassEnd(RCE) { + RegClassBegin(RCB), RegClassEnd(RCE), + CoveringLanes(SRICoveringLanes) { } TargetRegisterInfo::~TargetRegisterInfo() {} @@ -71,6 +73,14 @@ void PrintRegUnit::print(raw_ostream &OS) const { OS << '~' << TRI->getName(*Roots); } +void PrintVRegOrUnit::print(raw_ostream &OS) const { + if (TRI && TRI->isVirtualRegister(Unit)) { + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); + return; + } + PrintRegUnit::print(OS); +} + /// getAllocatableClass - Return the maximal subclass of the given register /// class that is alloctable, or NULL. const TargetRegisterClass * @@ -83,7 +93,7 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { Base < BaseE; Base += 32) { unsigned Idx = Base; for (unsigned Mask = *SubClass++; Mask; Mask >>= 1) { - unsigned Offset = CountTrailingZeros_32(Mask); + unsigned Offset = countTrailingZeros(Mask); const TargetRegisterClass *SubRC = getRegClass(Idx + Offset); if (SubRC->isAllocatable()) return SubRC; @@ -153,7 +163,7 @@ const TargetRegisterClass *firstCommonClass(const uint32_t *A, const TargetRegisterInfo *TRI) { for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) if (unsigned Common = *A++ & *B++) - return TRI->getRegClass(I + CountTrailingZeros_32(Common)); + return TRI->getRegClass(I + countTrailingZeros(Common)); return 0; } diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp index 1bf14db..b0f2ca6 100644 --- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp +++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp @@ -93,33 +93,10 @@ unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI, // effectively means infinite latency. Since users of the TargetSchedule API // don't know how to handle this, we convert it to a very large latency that is // easy to distinguish when debugging the DAG but won't induce overflow. -static unsigned convertLatency(int Cycles) { +static unsigned capLatency(int Cycles) { return Cycles >= 0 ? Cycles : 1000; } -/// If we can determine the operand latency from the def only, without machine -/// model or itinerary lookup, do so. Otherwise return -1. -int TargetSchedModel::getDefLatency(const MachineInstr *DefMI, - bool FindMin) const { - - // Return a latency based on the itinerary properties and defining instruction - // if possible. Some common subtargets don't require per-operand latency, - // especially for minimum latencies. - if (FindMin) { - // If MinLatency is invalid, then use the itinerary for MinLatency. If no - // itinerary exists either, then use single cycle latency. - if (SchedModel.MinLatency < 0 && !hasInstrItineraries()) { - return 1; - } - return SchedModel.MinLatency; - } - else if (!hasInstrSchedModel() && !hasInstrItineraries()) { - return TII->defaultDefLatency(&SchedModel, DefMI); - } - // ...operand lookup required - return -1; -} - /// Return the MCSchedClassDesc for this instruction. Some SchedClasses require /// evaluation of predicates that depend on instruction operands or flags. const MCSchedClassDesc *TargetSchedModel:: @@ -177,18 +154,16 @@ static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) { // Top-level API for clients that know the operand indices. unsigned TargetSchedModel::computeOperandLatency( const MachineInstr *DefMI, unsigned DefOperIdx, - const MachineInstr *UseMI, unsigned UseOperIdx, - bool FindMin) const { + const MachineInstr *UseMI, unsigned UseOperIdx) const { - int DefLatency = getDefLatency(DefMI, FindMin); - if (DefLatency >= 0) - return DefLatency; + if (!hasInstrSchedModel() && !hasInstrItineraries()) + return TII->defaultDefLatency(&SchedModel, DefMI); if (hasInstrItineraries()) { int OperLatency = 0; if (UseMI) { - OperLatency = - TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, UseMI, UseOperIdx); + OperLatency = TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, + UseMI, UseOperIdx); } else { unsigned DefClass = DefMI->getDesc().getSchedClass(); @@ -205,13 +180,11 @@ unsigned TargetSchedModel::computeOperandLatency( // hook to allow subtargets to specialize latency. This hook is only // applicable to the InstrItins model. InstrSchedModel should model all // special cases without TII hooks. - if (!FindMin) - InstrLatency = std::max(InstrLatency, - TII->defaultDefLatency(&SchedModel, DefMI)); + InstrLatency = std::max(InstrLatency, + TII->defaultDefLatency(&SchedModel, DefMI)); return InstrLatency; } - assert(!FindMin && hasInstrSchedModel() && - "Expected a SchedModel for this cpu"); + // hasInstrSchedModel() const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); unsigned DefIdx = findDefIdx(DefMI, DefOperIdx); if (DefIdx < SCDesc->NumWriteLatencyEntries) { @@ -219,7 +192,7 @@ unsigned TargetSchedModel::computeOperandLatency( const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc, DefIdx); unsigned WriteID = WLEntry->WriteResourceID; - unsigned Latency = convertLatency(WLEntry->Cycles); + unsigned Latency = capLatency(WLEntry->Cycles); if (!UseMI) return Latency; @@ -228,13 +201,17 @@ unsigned TargetSchedModel::computeOperandLatency( if (UseDesc->NumReadAdvanceEntries == 0) return Latency; unsigned UseIdx = findUseIdx(UseMI, UseOperIdx); - return Latency - STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); + int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); + if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap + return 0; + return Latency - Advance; } // If DefIdx does not exist in the model (e.g. implicit defs), then return // unit latency (defaultDefLatency may be too conservative). #ifndef NDEBUG if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() - && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()) { + && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef() + && SchedModel.isComplete()) { std::string Err; raw_string_ostream ss(Err); ss << "DefIdx " << DefIdx << " exceeds machine model writes for " @@ -248,10 +225,13 @@ unsigned TargetSchedModel::computeOperandLatency( return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI); } -unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { +unsigned +TargetSchedModel::computeInstrLatency(const MachineInstr *MI, + bool UseDefaultDefLatency) const { // For the itinerary model, fall back to the old subtarget hook. // Allow subtargets to compute Bundle latencies outside the machine model. - if (hasInstrItineraries() || MI->isBundle()) + if (hasInstrItineraries() || MI->isBundle() || + (!hasInstrSchedModel() && !UseDefaultDefLatency)) return TII->getInstrLatency(&InstrItins, MI); if (hasInstrSchedModel()) { @@ -263,7 +243,7 @@ unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { // Lookup the definition's write latency in SubtargetInfo. const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc, DefIdx); - Latency = std::max(Latency, convertLatency(WLEntry->Cycles)); + Latency = std::max(Latency, capLatency(WLEntry->Cycles)); } return Latency; } @@ -274,13 +254,10 @@ unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { unsigned TargetSchedModel:: computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *DepMI) const { - // MinLatency == -1 is for in-order processors that always have unit - // MinLatency. MinLatency > 0 is for in-order processors with varying min - // latencies, but since this is not a RAW dep, we always use unit latency. - if (SchedModel.MinLatency != 0) + if (SchedModel.MicroOpBufferSize <= 1) return 1; - // MinLatency == 0 indicates an out-of-order processor that can dispatch + // MicroOpBufferSize > 1 indicates an out-of-order processor that can dispatch // WAW dependencies in the same cycle. // Treat predication as a data dependency for out-of-order cpus. In-order @@ -302,7 +279,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, if (SCDesc->isValid()) { for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc), *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) { - if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered) + if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->BufferSize) return 1; } } diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 7ca2bee..b9a6b47 100644 --- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1400,7 +1400,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); SlotIndex endIdx = LIS->getInstructionIndex(MI).getRegSlot(IsEarlyClobber); - LI.addRange(LiveRange(LastCopyIdx, endIdx, VNI)); + LI.addSegment(LiveInterval::Segment(LastCopyIdx, endIdx, VNI)); } } @@ -1457,7 +1457,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber); if (I->end == UseIdx) - LI.removeRange(LastCopyIdx, UseIdx); + LI.removeSegment(LastCopyIdx, UseIdx); } } else if (RemovedKillFlag) { @@ -1539,7 +1539,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { // transformations that may either eliminate the tied operands or // improve the opportunities for coalescing away the register copy. if (TiedOperands.size() == 1) { - SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs + SmallVectorImpl<std::pair<unsigned, unsigned> > &TiedPairs = TiedOperands.begin()->second; if (TiedPairs.size() == 1) { unsigned SrcIdx = TiedPairs[0].first; diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp index a95ebcd..f735ef2 100644 --- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -24,7 +24,6 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/ProfileInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -50,7 +49,6 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTree>(); - AU.addPreserved<ProfileInfo>(); } }; } @@ -87,9 +85,7 @@ bool UnreachableBlockElim::runOnFunction(Function &F) { } // Actually remove the blocks now. - ProfileInfo *PI = getAnalysisIfAvailable<ProfileInfo>(); for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) { - if (PI) PI->removeBlock(DeadBlocks[i]); DeadBlocks[i]->eraseFromParent(); } diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp index cd012d2..e0aa405 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -264,15 +265,36 @@ void VirtRegRewriter::rewrite() { SmallVector<unsigned, 8> SuperDeads; SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; + SmallPtrSet<const MachineInstr *, 4> NoReturnInsts; for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { DEBUG(MBBI->print(dbgs(), Indexes)); + bool IsExitBB = MBBI->succ_empty(); for (MachineBasicBlock::instr_iterator MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) { MachineInstr *MI = MII; ++MII; + // Check if this instruction is a call to a noreturn function. + // If so, all the definitions set by this instruction can be ignored. + if (IsExitBB && MI->isCall()) + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + MachineOperand &MO = *MOI; + if (!MO.isGlobal()) + continue; + const Function *Func = dyn_cast<Function>(MO.getGlobal()); + if (!Func || !Func->hasFnAttribute(Attribute::NoReturn) || + // We need to keep correct unwind information + // even if the function will not return, since the + // runtime may need it. + !Func->hasFnAttribute(Attribute::NoUnwind)) + continue; + NoReturnInsts.insert(MI); + break; + } + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { MachineOperand &MO = *MOI; @@ -353,7 +375,25 @@ void VirtRegRewriter::rewrite() { } // Tell MRI about physical registers in use. - for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) - if (!MRI->reg_nodbg_empty(Reg)) - MRI->setPhysRegUsed(Reg); + if (NoReturnInsts.empty()) { + for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) + if (!MRI->reg_nodbg_empty(Reg)) + MRI->setPhysRegUsed(Reg); + } else { + for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) { + if (MRI->reg_nodbg_empty(Reg)) + continue; + // Check if this register has a use that will impact the rest of the + // code. Uses in debug and noreturn instructions do not impact the + // generated code. + for (MachineRegisterInfo::reg_nodbg_iterator It = + MRI->reg_nodbg_begin(Reg), + EndIt = MRI->reg_nodbg_end(); It != EndIt; ++It) { + if (!NoReturnInsts.count(&(*It))) { + MRI->setPhysRegUsed(Reg); + break; + } + } + } + } } |