diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
123 files changed, 7267 insertions, 3363 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 205480a..7a1c049 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -635,7 +635,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( --R; const unsigned NewSuperReg = Order[R]; // Don't consider non-allocatable registers - if (!RegClassInfo.isAllocatable(NewSuperReg)) continue; + if (!MRI.isAllocatable(NewSuperReg)) continue; // Don't replace a register with itself. if (NewSuperReg == SuperReg) continue; @@ -818,7 +818,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg)); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); - if (!RegClassInfo.isAllocatable(AntiDepReg)) { + if (!MRI.isAllocatable(AntiDepReg)) { // Don't break anti-dependencies on non-allocatable registers. DEBUG(dbgs() << " (non-allocatable)\n"); continue; diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp index 32ad34a..7cde136 100644 --- a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp +++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp @@ -29,6 +29,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg); std::pair<unsigned, unsigned> HintPair = VRM.getRegInfo().getRegAllocationHint(VirtReg); + const MachineRegisterInfo &MRI = VRM.getRegInfo(); // HintPair.second is a register, phys or virt. Hint = HintPair.second; @@ -52,7 +53,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, unsigned *P = new unsigned[Order.size()]; Begin = P; for (unsigned i = 0; i != Order.size(); ++i) - if (!RCI.isReserved(Order[i])) + if (!MRI.isReserved(Order[i])) *P++ = Order[i]; End = P; @@ -69,7 +70,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, // The hint must be a valid physreg for allocation. if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || - !RC->contains(Hint) || RCI.isReserved(Hint))) + !RC->contains(Hint) || MRI.isReserved(Hint))) Hint = 0; } diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp index 447f398..5162ad7 100644 --- a/contrib/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -21,7 +21,7 @@ #include "llvm/Module.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/ErrorHandling.h" @@ -79,7 +79,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty, uint64_t StartingOffset) { // Given a struct type, recursively traverse the elements. if (StructType *STy = dyn_cast<StructType>(Ty)) { - const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy); + const StructLayout *SL = TLI.getDataLayout()->getStructLayout(STy); for (StructType::element_iterator EB = STy->element_begin(), EI = EB, EE = STy->element_end(); @@ -91,7 +91,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty, // Given an array type, recursively traverse the elements. if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { Type *EltTy = ATy->getElementType(); - uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy); + uint64_t EltSize = TLI.getDataLayout()->getTypeAllocSize(EltTy); for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets, StartingOffset + i * EltSize); @@ -314,11 +314,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, // the return. Ignore noalias because it doesn't affect the call sequence. const Function *F = ExitBB->getParent(); Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); - if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) + if (AttrBuilder(CalleeRetAttr).removeAttribute(Attributes::NoAlias) != + AttrBuilder(CallerRetAttr).removeAttribute(Attributes::NoAlias)) return false; // It's not safe to eliminate the sign / zero extension of the return value. - if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) + if (CallerRetAttr.hasAttribute(Attributes::ZExt) || + CallerRetAttr.hasAttribute(Attributes::SExt)) return false; // Otherwise, make sure the unmodified return value of I is the return value. @@ -354,11 +356,13 @@ bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); - if (CallerRetAttr & ~Attribute::NoAlias) + if (AttrBuilder(CallerRetAttr) + .removeAttribute(Attributes::NoAlias).hasAttributes()) return false; // It's not safe to eliminate the sign / zero extension of the return value. - if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) + if (CallerRetAttr.hasAttribute(Attributes::ZExt) || + CallerRetAttr.hasAttribute(Attributes::SExt)) return false; // Check if the only use is a function return node. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index bf5d8c4..b2ebf04 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -24,7 +24,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d9be7a1..d74a703 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -33,7 +33,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -67,7 +67,7 @@ static gcp_map_type &getGCMap(void *&P) { /// getGVAlignmentLog2 - Return the alignment to use for the specified global /// value in log2 form. This rounds up to the preferred alignment if possible /// and legal. -static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD, +static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD, unsigned InBits = 0) { unsigned NumBits = 0; if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) @@ -131,9 +131,9 @@ const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { } -/// getTargetData - Return information about data layout. -const TargetData &AsmPrinter::getTargetData() const { - return *TM.getTargetData(); +/// getDataLayout - Return information about data layout. +const DataLayout &AsmPrinter::getDataLayout() const { + return *TM.getDataLayout(); } /// getCurrentSection() - Return the current section we are emitting to. @@ -160,7 +160,7 @@ bool AsmPrinter::doInitialization(Module &M) { const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) .Initialize(OutContext, TM); - Mang = new Mangler(OutContext, *TM.getTargetData()); + Mang = new Mangler(OutContext, *TM.getDataLayout()); // Allow the target to emit any magic that it wants at the start of the file. EmitStartOfAsmFile(M); @@ -213,16 +213,16 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { case GlobalValue::CommonLinkage: case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::LinkOnceODRAutoHideLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::LinkerPrivateWeakLinkage: - case GlobalValue::LinkerPrivateWeakDefAutoLinkage: if (MAI->getWeakDefDirective() != 0) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); if ((GlobalValue::LinkageTypes)Linkage != - GlobalValue::LinkerPrivateWeakDefAutoLinkage) + GlobalValue::LinkOnceODRAutoHideLinkage) // .weak_definition _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); else @@ -280,7 +280,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); // If the alignment is specified, we *must* obey it. Overaligning a global @@ -312,8 +312,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { return; } - if (MAI->getLCOMMDirectiveType() != LCOMM::None && - (MAI->getLCOMMDirectiveType() != LCOMM::NoAlignment || Align == 1)) { + if (Align == 1 || + MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) { // .lcomm _foo, 42 OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align); return; @@ -482,9 +482,8 @@ void AsmPrinter::EmitFunctionEntryLabel() { "' label emitted multiple times to assembly file"); } - -/// EmitComments - Pretty-print comments for instructions. -static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { +/// emitComments - Pretty-print comments for instructions. +static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { const MachineFunction *MF = MI.getParent()->getParent(); const TargetMachine &TM = MF->getTarget(); @@ -519,16 +518,16 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { CommentOS << " Reload Reuse\n"; } -/// EmitImplicitDef - This method emits the specified machine instruction +/// emitImplicitDef - This method emits the specified machine instruction /// that is an implicit def. -static void EmitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) { +static void emitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) { unsigned RegNo = MI->getOperand(0).getReg(); AP.OutStreamer.AddComment(Twine("implicit-def: ") + AP.TM.getRegisterInfo()->getName(RegNo)); AP.OutStreamer.AddBlankLine(); } -static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) { +static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { std::string Str = "kill:"; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &Op = MI->getOperand(i); @@ -541,10 +540,10 @@ static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) { AP.OutStreamer.AddBlankLine(); } -/// EmitDebugValueComment - This method handles the target-independent form +/// emitDebugValueComment - This method handles the target-independent form /// of DBG_VALUE, returning true if it was able to do so. A false return /// means the target will need to handle MI in EmitInstruction. -static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { +static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { // This code handles only the 3-operand target-independent form. if (MI->getNumOperands() != 3) return false; @@ -674,7 +673,7 @@ void AsmPrinter::EmitFunctionBody() { } if (isVerbose()) - EmitComments(*II, OutStreamer.GetCommentOS()); + emitComments(*II, OutStreamer.GetCommentOS()); switch (II->getOpcode()) { case TargetOpcode::PROLOG_LABEL: @@ -690,15 +689,15 @@ void AsmPrinter::EmitFunctionBody() { break; case TargetOpcode::DBG_VALUE: if (isVerbose()) { - if (!EmitDebugValueComment(II, *this)) + if (!emitDebugValueComment(II, *this)) EmitInstruction(II); } break; case TargetOpcode::IMPLICIT_DEF: - if (isVerbose()) EmitImplicitDef(II, *this); + if (isVerbose()) emitImplicitDef(II, *this); break; case TargetOpcode::KILL: - if (isVerbose()) EmitKill(II, *this); + if (isVerbose()) emitKill(II, *this); break; default: if (!TM.hasMCUseLoc()) @@ -992,7 +991,7 @@ void AsmPrinter::EmitConstantPool() { Kind = SectionKind::getReadOnlyWithRelLocal(); break; case 0: - switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) { + switch (TM.getDataLayout()->getTypeAllocSize(CPE.getType())) { case 4: Kind = SectionKind::getMergeableConst4(); break; case 8: Kind = SectionKind::getMergeableConst8(); break; case 16: Kind = SectionKind::getMergeableConst16();break; @@ -1038,7 +1037,7 @@ void AsmPrinter::EmitConstantPool() { OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/); Type *Ty = CPE.getType(); - Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty); + Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty); OutStreamer.EmitLabel(GetCPISymbol(CPI)); if (CPE.isMachineConstantPoolEntry()) @@ -1081,7 +1080,12 @@ void AsmPrinter::EmitJumpTableInfo() { JTInDiffSection = true; } - EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData()))); + EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getDataLayout()))); + + // Jump tables in code sections are marked with a data_region directive + // where that's supported. + if (!JTInDiffSection) + OutStreamer.EmitDataRegion(MCDR_DataRegionJT32); for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; @@ -1123,6 +1127,8 @@ void AsmPrinter::EmitJumpTableInfo() { for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) EmitJumpTableEntry(MJTI, JTBBs[ii], JTI); } + if (!JTInDiffSection) + OutStreamer.EmitDataRegion(MCDR_DataRegionEnd); } /// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the @@ -1190,7 +1196,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, assert(Value && "Unknown entry kind!"); - unsigned EntrySize = MJTI->getEntrySize(*TM.getTargetData()); + unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout()); OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0); } @@ -1292,7 +1298,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { } // Emit the function pointers in the target-specific order - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); unsigned Align = Log2_32(TD->getPointerPrefAlignment()); std::stable_sort(Structors.begin(), Structors.end(), priority_order); for (unsigned i = 0, e = Structors.size(); i != e; ++i) { @@ -1408,7 +1414,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // if required for correctness. // void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { - if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits); + if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits); if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. @@ -1422,9 +1428,9 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { // Constant emission. //===----------------------------------------------------------------------===// -/// LowerConstant - Lower the specified LLVM Constant to an MCExpr. +/// lowerConstant - Lower the specified LLVM Constant to an MCExpr. /// -static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { +static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { MCContext &Ctx = AP.OutContext; if (CV->isNullValue() || isa<UndefValue>(CV)) @@ -1447,12 +1453,12 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { switch (CE->getOpcode()) { default: // If the code isn't optimized, there may be outstanding folding - // opportunities. Attempt to fold the expression using TargetData as a + // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. if (Constant *C = - ConstantFoldConstantExpression(CE, AP.TM.getTargetData())) + ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) if (C != CE) - return LowerConstant(C, AP); + return lowerConstant(C, AP); // Otherwise report the problem to the user. { @@ -1464,21 +1470,20 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); // Generate a symbolic expression for the byte address const Constant *PtrVal = CE->getOperand(0); SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end()); int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec); - const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); + const MCExpr *Base = lowerConstant(CE->getOperand(0), AP); if (Offset == 0) return Base; // Truncate/sext the offset to the pointer size. - if (TD.getPointerSizeInBits() != 64) { - int SExtAmount = 64-TD.getPointerSizeInBits(); - Offset = (Offset << SExtAmount) >> SExtAmount; - } + unsigned Width = TD.getPointerSizeInBits(); + if (Width < 64) + Offset = SignExtend64(Offset, Width); return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), Ctx); @@ -1491,26 +1496,26 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { // is reasonable to treat their delta as a 32-bit value. // FALL THROUGH. case Instruction::BitCast: - return LowerConstant(CE->getOperand(0), AP); + return lowerConstant(CE->getOperand(0), AP); case Instruction::IntToPtr: { - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), false/*ZExt*/); - return LowerConstant(Op, AP); + return lowerConstant(Op, AP); } case Instruction::PtrToInt: { - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. Constant *Op = CE->getOperand(0); Type *Ty = CE->getType(); - const MCExpr *OpExpr = LowerConstant(Op, AP); + const MCExpr *OpExpr = lowerConstant(Op, AP); // We can emit the pointer value into this slot if the slot is an // integer slot equal to the size of the pointer. @@ -1536,8 +1541,8 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { case Instruction::And: case Instruction::Or: case Instruction::Xor: { - const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); - const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); + const MCExpr *LHS = lowerConstant(CE->getOperand(0), AP); + const MCExpr *RHS = lowerConstant(CE->getOperand(1), AP); switch (CE->getOpcode()) { default: llvm_unreachable("Unknown binary operator constant cast expr"); case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); @@ -1554,7 +1559,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { } } -static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace, +static void emitGlobalConstantImpl(const Constant *C, unsigned AddrSpace, AsmPrinter &AP); /// isRepeatedByteSequence - Determine whether the given value is @@ -1578,7 +1583,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { if (CI->getBitWidth() > 64) return -1; - uint64_t Size = TM.getTargetData()->getTypeAllocSize(V->getType()); + uint64_t Size = TM.getDataLayout()->getTypeAllocSize(V->getType()); uint64_t Value = CI->getZExtValue(); // Make sure the constant is at least 8 bits long and has a power @@ -1616,13 +1621,13 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { return -1; } -static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS, +static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, unsigned AddrSpace,AsmPrinter &AP){ // See if we can aggregate this into a .fill, if so, emit it as such. int Value = isRepeatedByteSequence(CDS, AP.TM); if (Value != -1) { - uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CDS->getType()); + uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType()); // Don't emit a 1-byte object as a .fill. if (Bytes > 1) return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); @@ -1672,7 +1677,7 @@ static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS, } } - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); unsigned Size = TD.getTypeAllocSize(CDS->getType()); unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) * CDS->getNumElements(); @@ -1681,28 +1686,28 @@ static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS, } -static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, +static void emitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, AsmPrinter &AP) { // See if we can aggregate some values. Make sure it can be // represented as a series of bytes of the constant value. int Value = isRepeatedByteSequence(CA, AP.TM); if (Value != -1) { - uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType()); + uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType()); AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); } else { for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); + emitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); } } -static void EmitGlobalConstantVector(const ConstantVector *CV, +static void emitGlobalConstantVector(const ConstantVector *CV, unsigned AddrSpace, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) - EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP); + emitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP); - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); unsigned Size = TD.getTypeAllocSize(CV->getType()); unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) * CV->getType()->getNumElements(); @@ -1710,10 +1715,10 @@ static void EmitGlobalConstantVector(const ConstantVector *CV, AP.OutStreamer.EmitZeros(Padding, AddrSpace); } -static void EmitGlobalConstantStruct(const ConstantStruct *CS, +static void emitGlobalConstantStruct(const ConstantStruct *CS, unsigned AddrSpace, AsmPrinter &AP) { // Print the fields in successive locations. Pad to align if needed! - const TargetData *TD = AP.TM.getTargetData(); + const DataLayout *TD = AP.TM.getDataLayout(); unsigned Size = TD->getTypeAllocSize(CS->getType()); const StructLayout *Layout = TD->getStructLayout(CS->getType()); uint64_t SizeSoFar = 0; @@ -1727,7 +1732,7 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS, SizeSoFar += FieldSize + PadSize; // Now print the actual field value. - EmitGlobalConstantImpl(Field, AddrSpace, AP); + emitGlobalConstantImpl(Field, AddrSpace, AP); // Insert padding - this may include padding to increase the size of the // current field up to the ABI size (if the struct is not packed) as well @@ -1738,7 +1743,7 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS, "Layout of constant struct may be incorrect!"); } -static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, +static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AsmPrinter &AP) { if (CFP->getType()->isHalfTy()) { if (AP.isVerbose()) { @@ -1793,7 +1798,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, << DoubleVal.convertToDouble() << '\n'; } - if (AP.TM.getTargetData()->isBigEndian()) { + if (AP.TM.getDataLayout()->isBigEndian()) { AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace); AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); } else { @@ -1802,7 +1807,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, } // Emit the tail padding for the long double. - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - TD.getTypeStoreSize(CFP->getType()), AddrSpace); return; @@ -1814,7 +1819,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, // API needed to prevent premature destruction. APInt API = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = API.getRawData(); - if (AP.TM.getTargetData()->isBigEndian()) { + if (AP.TM.getDataLayout()->isBigEndian()) { AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace); } else { @@ -1823,9 +1828,9 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, } } -static void EmitGlobalConstantLargeInt(const ConstantInt *CI, +static void emitGlobalConstantLargeInt(const ConstantInt *CI, unsigned AddrSpace, AsmPrinter &AP) { - const TargetData *TD = AP.TM.getTargetData(); + const DataLayout *TD = AP.TM.getDataLayout(); unsigned BitWidth = CI->getBitWidth(); assert((BitWidth & 63) == 0 && "only support multiples of 64-bits"); @@ -1839,9 +1844,9 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI, } } -static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, +static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, AsmPrinter &AP) { - const TargetData *TD = AP.TM.getTargetData(); + const DataLayout *TD = AP.TM.getDataLayout(); uint64_t Size = TD->getTypeAllocSize(CV->getType()); if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) return AP.OutStreamer.EmitZeros(Size, AddrSpace); @@ -1858,13 +1863,13 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); return; default: - EmitGlobalConstantLargeInt(CI, AddrSpace, AP); + emitGlobalConstantLargeInt(CI, AddrSpace, AP); return; } } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) - return EmitGlobalConstantFP(CFP, AddrSpace, AP); + return emitGlobalConstantFP(CFP, AddrSpace, AP); if (isa<ConstantPointerNull>(CV)) { AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); @@ -1872,19 +1877,19 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, } if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV)) - return EmitGlobalConstantDataSequential(CDS, AddrSpace, AP); + return emitGlobalConstantDataSequential(CDS, AddrSpace, AP); if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) - return EmitGlobalConstantArray(CVA, AddrSpace, AP); + return emitGlobalConstantArray(CVA, AddrSpace, AP); if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) - return EmitGlobalConstantStruct(CVS, AddrSpace, AP); + return emitGlobalConstantStruct(CVS, AddrSpace, AP); if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of // vectors). if (CE->getOpcode() == Instruction::BitCast) - return EmitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP); + return emitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP); if (Size > 8) { // If the constant expression's size is greater than 64-bits, then we have @@ -1892,23 +1897,23 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, // that way. Constant *New = ConstantFoldConstantExpression(CE, TD); if (New && New != CE) - return EmitGlobalConstantImpl(New, AddrSpace, AP); + return emitGlobalConstantImpl(New, AddrSpace, AP); } } if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) - return EmitGlobalConstantVector(V, AddrSpace, AP); + return emitGlobalConstantVector(V, AddrSpace, AP); // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - AP.OutStreamer.EmitValue(LowerConstant(CV, AP), Size, AddrSpace); + AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size, AddrSpace); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { - uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); + uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType()); if (Size) - EmitGlobalConstantImpl(CV, AddrSpace, *this); + emitGlobalConstantImpl(CV, AddrSpace, *this); else if (MAI->hasSubsectionsViaSymbols()) { // If the global has zero size, emit a single byte so that two labels don't // look like they are at the same location. @@ -2023,8 +2028,8 @@ static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop, } } -/// EmitBasicBlockLoopComments - Pretty-print comments for basic blocks. -static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB, +/// emitBasicBlockLoopComments - Pretty-print comments for basic blocks. +static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, const MachineLoopInfo *LI, const AsmPrinter &AP) { // Add loop depth information @@ -2090,7 +2095,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { if (const BasicBlock *BB = MBB->getBasicBlock()) if (BB->hasName()) OutStreamer.AddComment("%" + BB->getName()); - EmitBasicBlockLoopComments(*MBB, LI, *this); + emitBasicBlockLoopComments(*MBB, LI, *this); } // Print the main label for the block. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 90d511c..d94e1fe 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -18,7 +18,7 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -112,7 +112,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { switch (Encoding & 0x07) { default: llvm_unreachable("Invalid encoded value."); - case dwarf::DW_EH_PE_absptr: return TM.getTargetData()->getPointerSize(); + case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize(); case dwarf::DW_EH_PE_udata2: return 2; case dwarf::DW_EH_PE_udata4: return 4; case dwarf::DW_EH_PE_udata8: return 8; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index db43b06..50f0fc3 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -43,10 +43,10 @@ namespace { }; } -/// SrcMgrDiagHandler - This callback is invoked when the SourceMgr for an +/// srcMgrDiagHandler - This callback is invoked when the SourceMgr for an /// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo /// struct above. -static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { +static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo); assert(DiagInfo && "Diagnostic context not passed down?"); @@ -68,7 +68,8 @@ static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { } /// EmitInlineAsm - Emit a blob of inline asm to the output streamer. -void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { +void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, + InlineAsm::AsmDialect Dialect) const { assert(!Str.empty() && "Can't emit empty inline asm block"); // Remember if the buffer is nul terminated or not so we can avoid a copy. @@ -91,12 +92,12 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { LLVMContext &LLVMCtx = MMI->getModule()->getContext(); bool HasDiagHandler = false; if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) { - // If the source manager has an issue, we arrange for SrcMgrDiagHandler + // If the source manager has an issue, we arrange for srcMgrDiagHandler // to be invoked, getting DiagInfo passed into it. DiagInfo.LocInfo = LocMDNode; DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler(); DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext(); - SrcMgr.setDiagHandler(SrcMgrDiagHandler, &DiagInfo); + SrcMgr.setDiagHandler(srcMgrDiagHandler, &DiagInfo); HasDiagHandler = true; } @@ -126,6 +127,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); + Parser->setAssemblerDialect(Dialect); Parser->setTargetParser(*TAP.get()); // Don't implicitly switch to the text section before the asm. @@ -135,71 +137,113 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { report_fatal_error("Error parsing inline asm\n"); } +static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, + MachineModuleInfo *MMI, int InlineAsmVariant, + AsmPrinter *AP, unsigned LocCookie, + raw_ostream &OS) { + // Switch to the inline assembly variant. + OS << "\t.intel_syntax\n\t"; -/// EmitInlineAsm - This method formats and emits the specified machine -/// instruction that is an inline asm. -void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { - assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms"); - + const char *LastEmitted = AsmStr; // One past the last character emitted. unsigned NumOperands = MI->getNumOperands(); - // Count the number of register definitions to find the asm string. - unsigned NumDefs = 0; - for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); - ++NumDefs) - assert(NumDefs != NumOperands-2 && "No asm string?"); + while (*LastEmitted) { + switch (*LastEmitted) { + default: { + // Not a special case, emit the string section literally. + const char *LiteralEnd = LastEmitted+1; + while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' && + *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n') + ++LiteralEnd; - assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); + OS.write(LastEmitted, LiteralEnd-LastEmitted); + LastEmitted = LiteralEnd; + break; + } + case '\n': + ++LastEmitted; // Consume newline character. + OS << '\n'; // Indent code with newline. + break; + case '$': { + ++LastEmitted; // Consume '$' character. + bool Done = true; - // Disassemble the AsmStr, printing out the literal pieces, the operands, etc. - const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + // Handle escapes. + switch (*LastEmitted) { + default: Done = false; break; + case '$': + ++LastEmitted; // Consume second '$' character. + break; + } + if (Done) break; - // If this asmstr is empty, just print the #APP/#NOAPP markers. - // These are useful to see where empty asm's wound up. - if (AsmStr[0] == 0) { - // Don't emit the comments if writing to a .o file. - if (!OutStreamer.hasRawTextSupport()) return; + const char *IDStart = LastEmitted; + const char *IDEnd = IDStart; + while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd; - OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ - MAI->getInlineAsmStart()); - OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ - MAI->getInlineAsmEnd()); - return; - } + unsigned Val; + if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val)) + report_fatal_error("Bad $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + LastEmitted = IDEnd; - // Emit the #APP start marker. This has to happen even if verbose-asm isn't - // enabled, so we use EmitRawText. - if (OutStreamer.hasRawTextSupport()) - OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ - MAI->getInlineAsmStart()); + if (Val >= NumOperands-1) + report_fatal_error("Invalid $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); - // Get the !srcloc metadata node if we have it, and decode the loc cookie from - // it. - unsigned LocCookie = 0; - const MDNode *LocMD = 0; - for (unsigned i = MI->getNumOperands(); i != 0; --i) { - if (MI->getOperand(i-1).isMetadata() && - (LocMD = MI->getOperand(i-1).getMetadata()) && - LocMD->getNumOperands() != 0) { - if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) { - LocCookie = CI->getZExtValue(); - break; - } - } - } + // Okay, we finally have a value number. Ask the target to print this + // operand! + unsigned OpNo = InlineAsm::MIOp_FirstOperand; - // Emit the inline asm to a temporary string so we can emit it through - // EmitInlineAsm. - SmallString<256> StringData; - raw_svector_ostream OS(StringData); + bool Error = false; - OS << '\t'; + // Scan to find the machine operand number for the operand. + for (; Val; --Val) { + if (OpNo >= MI->getNumOperands()) break; + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; + } - // The variant of the current asmprinter. - int AsmPrinterVariant = MAI->getAssemblerDialect(); + // We may have a location metadata attached to the end of the + // instruction, and at no point should see metadata at any + // other point while processing. It's an error if so. + if (OpNo >= MI->getNumOperands() || + MI->getOperand(OpNo).isMetadata()) { + Error = true; + } else { + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + ++OpNo; // Skip over the ID number. + + if (InlineAsm::isMemKind(OpFlags)) { + Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, + /*Modifier*/ 0, OS); + } else { + Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant, + /*Modifier*/ 0, OS); + } + } + if (Error) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "invalid operand in inline asm: '" << AsmStr << "'"; + MMI->getModule()->getContext().emitError(LocCookie, Msg.str()); + } + break; + } + } + } + OS << "\n\t.att_syntax\n" << (char)0; // null terminate string. +} +static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, + MachineModuleInfo *MMI, int InlineAsmVariant, + int AsmPrinterVariant, AsmPrinter *AP, + unsigned LocCookie, raw_ostream &OS) { int CurVariant = -1; // The number of the {.|.|.} region we are in. const char *LastEmitted = AsmStr; // One past the last character emitted. + unsigned NumOperands = MI->getNumOperands(); + + OS << '\t'; while (*LastEmitted) { switch (*LastEmitted) { @@ -272,7 +316,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { " string: '" + Twine(AsmStr) + "'"); std::string Val(StrStart, StrEnd); - PrintSpecial(MI, OS, Val.c_str()); + AP->PrintSpecial(MI, OS, Val.c_str()); LastEmitted = StrEnd+1; break; } @@ -340,13 +384,12 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { // FIXME: What if the operand isn't an MBB, report error? OS << *MI->getOperand(OpNo).getMBB()->getSymbol(); else { - AsmPrinter *AP = const_cast<AsmPrinter*>(this); if (InlineAsm::isMemKind(OpFlags)) { - Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant, + Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, Modifier[0] ? Modifier : 0, OS); } else { - Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant, + Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant, Modifier[0] ? Modifier : 0, OS); } } @@ -363,7 +406,74 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { } } OS << '\n' << (char)0; // null terminate string. - EmitInlineAsm(OS.str(), LocMD); +} + +/// EmitInlineAsm - This method formats and emits the specified machine +/// instruction that is an inline asm. +void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { + assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms"); + + // Count the number of register definitions to find the asm string. + unsigned NumDefs = 0; + for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); + ++NumDefs) + assert(NumDefs != MI->getNumOperands()-2 && "No asm string?"); + + assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); + + // Disassemble the AsmStr, printing out the literal pieces, the operands, etc. + const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + + // If this asmstr is empty, just print the #APP/#NOAPP markers. + // These are useful to see where empty asm's wound up. + if (AsmStr[0] == 0) { + // Don't emit the comments if writing to a .o file. + if (!OutStreamer.hasRawTextSupport()) return; + + OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ + MAI->getInlineAsmStart()); + OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ + MAI->getInlineAsmEnd()); + return; + } + + // Emit the #APP start marker. This has to happen even if verbose-asm isn't + // enabled, so we use EmitRawText. + if (OutStreamer.hasRawTextSupport()) + OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ + MAI->getInlineAsmStart()); + + // Get the !srcloc metadata node if we have it, and decode the loc cookie from + // it. + unsigned LocCookie = 0; + const MDNode *LocMD = 0; + for (unsigned i = MI->getNumOperands(); i != 0; --i) { + if (MI->getOperand(i-1).isMetadata() && + (LocMD = MI->getOperand(i-1).getMetadata()) && + LocMD->getNumOperands() != 0) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) { + LocCookie = CI->getZExtValue(); + break; + } + } + } + + // Emit the inline asm to a temporary string so we can emit it through + // EmitInlineAsm. + SmallString<256> StringData; + raw_svector_ostream OS(StringData); + + // The variant of the current asmprinter. + int AsmPrinterVariant = MAI->getAssemblerDialect(); + InlineAsm::AsmDialect InlineAsmVariant = MI->getInlineAsmDialect(); + AsmPrinter *AP = const_cast<AsmPrinter*>(this); + if (InlineAsmVariant == InlineAsm::AD_ATT) + EmitGCCInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AsmPrinterVariant, + AP, LocCookie, OS); + else + EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS); + + EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect()); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't // enabled, so we use EmitRawText. @@ -409,8 +519,8 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, /// instruction, using the specified assembler variant. Targets should /// override this to format as appropriate. bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O) { + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) { // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) { if (ExtraCode[1] != 0) return true; // Unknown modifier. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 3776848..4d73b3c 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -17,7 +17,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -182,6 +182,12 @@ void DIEValue::dump() { void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { unsigned Size = ~0U; switch (Form) { + case dwarf::DW_FORM_flag_present: + // Emit something to keep the lines and comments in sync. + // FIXME: Is there a better way to do this? + if (Asm->OutStreamer.hasRawTextSupport()) + Asm->OutStreamer.EmitRawText(StringRef("")); + return; case dwarf::DW_FORM_flag: // Fall thru case dwarf::DW_FORM_ref1: // Fall thru case dwarf::DW_FORM_data1: Size = 1; break; @@ -193,7 +199,8 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { case dwarf::DW_FORM_data8: Size = 8; break; case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; - case dwarf::DW_FORM_addr: Size = Asm->getTargetData().getPointerSize(); break; + case dwarf::DW_FORM_addr: + Size = Asm->getDataLayout().getPointerSize(); break; default: llvm_unreachable("DIE Value form not supported yet"); } Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/); @@ -203,6 +210,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { /// unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { switch (Form) { + case dwarf::DW_FORM_flag_present: return 0; case dwarf::DW_FORM_flag: // Fall thru case dwarf::DW_FORM_ref1: // Fall thru case dwarf::DW_FORM_data1: return sizeof(int8_t); @@ -214,7 +222,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { case dwarf::DW_FORM_data8: return sizeof(int64_t); case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); - case dwarf::DW_FORM_addr: return AP->getTargetData().getPointerSize(); + case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(); default: llvm_unreachable("DIE Value form not supported yet"); } } @@ -241,7 +249,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getTargetData().getPointerSize(); + return AP->getDataLayout().getPointerSize(); } #ifndef NDEBUG @@ -265,7 +273,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const { unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getTargetData().getPointerSize(); + return AP->getDataLayout().getPointerSize(); } #ifndef NDEBUG diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h index f93ea1b..28a96f3 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h @@ -214,9 +214,6 @@ namespace llvm { /// virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0; - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *) { return true; } - #ifndef NDEBUG virtual void print(raw_ostream &O) = 0; void dump(); @@ -257,7 +254,6 @@ namespace llvm { virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; // Implement isa/cast/dyncast. - static bool classof(const DIEInteger *) { return true; } static bool classof(const DIEValue *I) { return I->getType() == isInteger; } #ifndef NDEBUG @@ -286,7 +282,6 @@ namespace llvm { virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; // Implement isa/cast/dyncast. - static bool classof(const DIELabel *) { return true; } static bool classof(const DIEValue *L) { return L->getType() == isLabel; } #ifndef NDEBUG @@ -313,7 +308,6 @@ namespace llvm { virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; // Implement isa/cast/dyncast. - static bool classof(const DIEDelta *) { return true; } static bool classof(const DIEValue *D) { return D->getType() == isDelta; } #ifndef NDEBUG @@ -343,7 +337,6 @@ namespace llvm { } // Implement isa/cast/dyncast. - static bool classof(const DIEEntry *) { return true; } static bool classof(const DIEValue *E) { return E->getType() == isEntry; } #ifndef NDEBUG @@ -383,7 +376,6 @@ namespace llvm { virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; // Implement isa/cast/dyncast. - static bool classof(const DIEBlock *) { return true; } static bool classof(const DIEValue *E) { return E->getType() == isBlock; } #ifndef NDEBUG diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index 454a923..05e0f2f 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -133,8 +133,8 @@ void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { } } -// Walk through and emit the buckets for the table. This will look -// like a list of numbers of how many elements are in each bucket. +// Walk through and emit the buckets for the table. Each index is +// an offset into the list of hashes. void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { unsigned index = 0; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 963b8cd..92d1bbe 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -237,8 +237,8 @@ private: #endif }; - DwarfAccelTable(const DwarfAccelTable&); // DO NOT IMPLEMENT - void operator=(const DwarfAccelTable&); // DO NOT IMPLEMENT + DwarfAccelTable(const DwarfAccelTable&) LLVM_DELETED_FUNCTION; + void operator=(const DwarfAccelTable&) LLVM_DELETED_FUNCTION; // Internal Functions void EmitHeader(AsmPrinter *); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index d975f1f..4fdd5ca 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -25,7 +25,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index d30e5bb..2b07dda 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -22,7 +22,7 @@ #include "llvm/Instructions.h" #include "llvm/Support/Debug.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -51,6 +51,15 @@ DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) { return Value; } +/// addFlag - Add a flag that is true. +void CompileUnit::addFlag(DIE *Die, unsigned Attribute) { + if (!DD->useDarwinGDBCompat()) + Die->addValue(Attribute, dwarf::DW_FORM_flag_present, + DIEIntegerOne); + else + addUInt(Die, Attribute, dwarf::DW_FORM_flag, 1); +} + /// addUInt - Add an unsigned integer attribute data and value. /// void CompileUnit::addUInt(DIE *Die, unsigned Attribute, @@ -501,7 +510,7 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { const char *FltPtr = (const char*)FltVal.getRawData(); int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. - bool LittleEndian = Asm->getTargetData().isLittleEndian(); + bool LittleEndian = Asm->getDataLayout().isLittleEndian(); int Incr = (LittleEndian ? 1 : -1); int Start = (LittleEndian ? 0 : NumBytes - 1); int Stop = (LittleEndian ? NumBytes : -1); @@ -543,7 +552,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, const uint64_t *Ptr64 = Val.getRawData(); int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. - bool LittleEndian = Asm->getTargetData().isLittleEndian(); + bool LittleEndian = Asm->getDataLayout().isLittleEndian(); // Output the constant to DWARF one byte at a time. for (int i = 0; i < NumBytes; i++) { @@ -794,7 +803,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) - addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_prototyped); } break; case dwarf::DW_TAG_structure_type: @@ -825,15 +834,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); if (SP.isExplicit()) - addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1); + addFlag(ElemDie, dwarf::DW_AT_explicit); } else if (Element.isVariable()) { DIVariable DV(Element); ElemDie = new DIE(dwarf::DW_TAG_variable); addString(ElemDie, dwarf::DW_AT_name, DV.getName()); addType(ElemDie, DV.getType()); - addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addFlag(ElemDie, dwarf::DW_AT_declaration); + addFlag(ElemDie, dwarf::DW_AT_external); addSourceLine(ElemDie, DV); } else if (Element.isDerivedType()) { DIDerivedType DDTy(Element); @@ -883,7 +892,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } if (CTy.isAppleBlockExtension()) - addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_APPLE_block); DICompositeType ContainingType = CTy.getContainingType(); if (DIDescriptor(ContainingType).isCompositeType()) @@ -895,8 +904,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } if (CTy.isObjcClassComplete()) - addUInt(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type, - dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type); // Add template parameters to a class, structure or union types. // FIXME: The support isn't in the metadata for this yet. @@ -929,7 +937,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // If we're a forward decl, say so. if (CTy.isForwardDecl()) - addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_declaration); // Add source line info if available. if (!CTy.isForwardDecl()) @@ -1028,8 +1036,10 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // AT_specification code in order to work around a bug in older // gdbs that requires the linkage name to resolve multiple template // functions. + // TODO: Remove this set of code when we get rid of the old gdb + // compatibility. StringRef LinkageName = SP.getLinkageName(); - if (!LinkageName.empty()) + if (!LinkageName.empty() && DD->useDarwinGDBCompat()) addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, getRealLinkageName(LinkageName)); @@ -1043,6 +1053,11 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { return SPDie; } + // Add the linkage name if we have one. + if (!LinkageName.empty() && !DD->useDarwinGDBCompat()) + addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, + getRealLinkageName(LinkageName)); + // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) addString(SPDie, dwarf::DW_AT_name, SP.getName()); @@ -1055,7 +1070,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) - addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_prototyped); // Add Return Type. DICompositeType SPTy = SP.getType(); @@ -1079,7 +1094,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { } if (!SP.isDefinition()) { - addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_declaration); // Add arguments. Do not add arguments for subprogram definition. They will // be handled while processing variables. @@ -1090,22 +1105,22 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { if (SPTag == dwarf::DW_TAG_subroutine_type) for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(DIType(Args.getElement(i))); + DIType ATy = DIType(Args.getElement(i)); addType(Arg, ATy); if (ATy.isArtificial()) - addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + addFlag(Arg, dwarf::DW_AT_artificial); SPDie->addChild(Arg); } } if (SP.isArtificial()) - addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_artificial); if (!SP.isLocalToUnit()) - addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_external); if (SP.isOptimized()) - addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_APPLE_optimized); if (unsigned isa = Asm->getISAEncoding()) { addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); @@ -1168,7 +1183,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { // Add scoping info. if (!GV.isLocalToUnit()) - addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addFlag(VariableDIE, dwarf::DW_AT_external); // Add line number info. addSourceLine(VariableDIE, GV); @@ -1193,8 +1208,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, VariableDIE); addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); - addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, - 1); + addFlag(VariableDIE, dwarf::DW_AT_declaration); addDie(VariableSpecDIE); } else { addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); @@ -1213,7 +1227,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end()); addUInt(Block, 0, dwarf::DW_FORM_udata, - Asm->getTargetData().getIndexedOffset(Ptr->getType(), Idx)); + Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } @@ -1260,7 +1274,7 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType *CTy) { Buffer.setTag(dwarf::DW_TAG_array_type); if (CTy->getTag() == dwarf::DW_TAG_vector_type) - addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_GNU_vector); // Emit derived type. addType(&Buffer, CTy->getTypeDerivedFrom()); @@ -1333,8 +1347,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { } if (DV->isArtificial()) - addUInt(VariableDie, dwarf::DW_AT_artificial, - dwarf::DW_FORM_flag, 1); + addFlag(VariableDie, dwarf::DW_AT_artificial); if (isScopeAbstract) { DV->setDIE(VariableDie); @@ -1446,7 +1459,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { Offset -= FieldOffset; // Maybe we need to work from the other end. - if (Asm->getTargetData().isLittleEndian()) + if (Asm->getDataLayout().isLittleEndian()) Offset = FieldSize - (Offset + Size); addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index b4ff9e8..fad9b6e 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -176,6 +176,9 @@ public: } public: + /// addFlag - Add a flag that is true to the DIE. + void addFlag(DIE *Die, unsigned Attribute); + /// addUInt - Add an unsigned integer attribute data and value. /// void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); @@ -280,8 +283,8 @@ public: /// for the given DITemplateTypeParameter. DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP); - /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE - /// for the given DITemplateValueParameter. + /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create + /// new DIE for the given DITemplateValueParameter. DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP); /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 649684a..367b523 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -27,7 +27,7 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -54,9 +54,29 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden, cl::desc("Make an absence of debug location information explicit."), cl::init(false)); -static cl::opt<bool> DwarfAccelTables("dwarf-accel-tables", cl::Hidden, +namespace { + enum DefaultOnOff { + Default, Enable, Disable + }; +} + +static cl::opt<DefaultOnOff> DwarfAccelTables("dwarf-accel-tables", cl::Hidden, cl::desc("Output prototype dwarf accelerator tables."), - cl::init(false)); + cl::values( + clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), + clEnumValEnd), + cl::init(Default)); + +static cl::opt<DefaultOnOff> DarwinGDBCompat("darwin-gdb-compat", cl::Hidden, + cl::desc("Compatibility with Darwin gdb."), + cl::values( + clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), + clEnumValEnd), + cl::init(Default)); namespace { const char *DWARFGroupName = "DWARF Emission"; @@ -135,10 +155,25 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; - // Turn on accelerator tables for Darwin. - if (Triple(M->getTargetTriple()).isOSDarwin()) - DwarfAccelTables = true; - + // Turn on accelerator tables and older gdb compatibility + // for Darwin. + bool isDarwin = Triple(M->getTargetTriple()).isOSDarwin(); + if (DarwinGDBCompat == Default) { + if (isDarwin) + isDarwinGDBCompat = true; + else + isDarwinGDBCompat = false; + } else + isDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false; + + if (DwarfAccelTables == Default) { + if (isDarwin) + hasDwarfAccelTables = true; + else + hasDwarfAccelTables = false; + } else + hasDwarfAccelTables = DwarfAccelTables == Enable ? true : false; + { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(M); @@ -272,44 +307,51 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, assert(SPDie && "Unable to find subprogram DIE!"); DISubprogram SP(SPNode); - DISubprogram SPDecl = SP.getFunctionDeclaration(); - if (!SPDecl.isSubprogram()) { - // There is not any need to generate specification DIE for a function - // defined at compile unit level. If a function is defined inside another - // function then gdb prefers the definition at top level and but does not - // expect specification DIE in parent function. So avoid creating - // specification DIE for a function defined inside a function. - if (SP.isDefinition() && !SP.getContext().isCompileUnit() && - !SP.getContext().isFile() && - !isSubprogramContext(SP.getContext())) { - SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - - // Add arguments. - DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); - if (SPTag == dwarf::DW_TAG_subroutine_type) - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(DIType(Args.getElement(i))); - SPCU->addType(Arg, ATy); - if (ATy.isArtificial()) - SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); - SPDie->addChild(Arg); - } - DIE *SPDeclDie = SPDie; - SPDie = new DIE(dwarf::DW_TAG_subprogram); - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, - SPDeclDie); - SPCU->addDie(SPDie); - } - } - // Pick up abstract subprogram DIE. + // If we're updating an abstract DIE, then we will be adding the children and + // object pointer later on. But what we don't want to do is process the + // concrete DIE twice. if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { + // Pick up abstract subprogram DIE. SPDie = new DIE(dwarf::DW_TAG_subprogram); SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, dwarf::DW_FORM_ref4, AbsSPDIE); SPCU->addDie(SPDie); + } else { + DISubprogram SPDecl = SP.getFunctionDeclaration(); + if (!SPDecl.isSubprogram()) { + // There is not any need to generate specification DIE for a function + // defined at compile unit level. If a function is defined inside another + // function then gdb prefers the definition at top level and but does not + // expect specification DIE in parent function. So avoid creating + // specification DIE for a function defined inside a function. + if (SP.isDefinition() && !SP.getContext().isCompileUnit() && + !SP.getContext().isFile() && + !isSubprogramContext(SP.getContext())) { + SPCU->addFlag(SPDie, dwarf::DW_AT_declaration); + + // Add arguments. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + if (SPTag == dwarf::DW_TAG_subroutine_type) + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIType ATy = DIType(Args.getElement(i)); + SPCU->addType(Arg, ATy); + if (ATy.isArtificial()) + SPCU->addFlag(Arg, dwarf::DW_AT_artificial); + if (ATy.isObjectPointer()) + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, + dwarf::DW_FORM_ref4, Arg); + SPDie->addChild(Arg); + } + DIE *SPDeclDie = SPDie; + SPDie = new DIE(dwarf::DW_TAG_subprogram); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + SPDeclDie); + SPCU->addDie(SPDie); + } + } } SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, @@ -346,7 +388,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, // DW_AT_ranges appropriately. TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, DebugRangeSymbols.size() - * Asm->getTargetData().getPointerSize()); + * Asm->getDataLayout().getPointerSize()); for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); @@ -386,7 +428,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, DISubprogram InlinedSP = getDISubprogram(DS); DIE *OriginDIE = TheCU->getDIE(InlinedSP); if (!OriginDIE) { - DEBUG(dbgs() << "Unable to find original DIE for inlined subprogram."); + DEBUG(dbgs() << "Unable to find original DIE for an inlined subprogram."); return NULL; } @@ -395,7 +437,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, const MCSymbol *EndLabel = getLabelAfterInsn(RI->second); if (StartLabel == 0 || EndLabel == 0) { - llvm_unreachable("Unexpected Start and End labels for a inlined scope!"); + llvm_unreachable("Unexpected Start and End labels for an inlined scope!"); } assert(StartLabel->isDefined() && "Invalid starting label for an inlined scope!"); @@ -412,7 +454,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, // DW_AT_ranges appropriately. TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, DebugRangeSymbols.size() - * Asm->getTargetData().getPointerSize()); + * Asm->getDataLayout().getPointerSize()); for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); @@ -461,21 +503,26 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { return NULL; SmallVector<DIE *, 8> Children; + DIE *ObjectPointer = NULL; // Collect arguments for current function. if (LScopes.isCurrentFunctionScope(Scope)) for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i) if (DbgVariable *ArgDV = CurrentFnArguments[i]) if (DIE *Arg = - TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) + TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) { Children.push_back(Arg); + if (ArgDV->isObjectPointer()) ObjectPointer = Arg; + } // Collect lexical scope children first. const SmallVector<DbgVariable *, 8> &Variables = ScopeVariables.lookup(Scope); for (unsigned i = 0, N = Variables.size(); i < N; ++i) if (DIE *Variable = - TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) + TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) { Children.push_back(Variable); + if (Variables[i]->isObjectPointer()) ObjectPointer = Variable; + } const SmallVector<LexicalScope *, 4> &Scopes = Scope->getChildren(); for (unsigned j = 0, M = Scopes.size(); j < M; ++j) if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j])) @@ -509,6 +556,10 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { E = Children.end(); I != E; ++I) ScopeDIE->addChild(*I); + if (DS.isSubprogram() && ObjectPointer != NULL) + TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, + dwarf::DW_FORM_ref4, ObjectPointer); + if (DS.isSubprogram()) TheCU->addPubTypes(DISubprogram(DS)); @@ -556,7 +607,8 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { unsigned ID = GetOrCreateSourceID(FN, CompilationDir); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die, Asm, this); + CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die, + Asm, this); NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer()); NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, DIUnit.getLanguage()); @@ -575,7 +627,7 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { if (!CompilationDir.empty()) NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); if (DIUnit.isOptimized()) - NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized); StringRef Flags = DIUnit.getFlags(); if (!Flags.empty()) @@ -755,7 +807,7 @@ void DwarfDebug::endModule() { LexicalScope *Scope = new LexicalScope(NULL, DIDescriptor(SP), NULL, false); DeadFnScopeMap[SP] = Scope; - + // Construct subprogram DIE and add variables DIEs. CompileUnit *SPCU = CUMap.lookup(TheCU); assert(SPCU && "Unable to find Compile Unit!"); @@ -802,9 +854,9 @@ void DwarfDebug::endModule() { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end")); // End text sections. - for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) { - Asm->OutStreamer.SwitchSection(SectionMap[i]); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", i)); + for (unsigned I = 0, E = SectionMap.size(); I != E; ++I) { + Asm->OutStreamer.SwitchSection(SectionMap[I]); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", I+1)); } // Compute DIE offsets and sizes. @@ -816,8 +868,8 @@ void DwarfDebug::endModule() { // Corresponding abbreviations into a abbrev section. emitAbbreviations(); - // Emit info into a dwarf accelerator table sections. - if (DwarfAccelTables) { + // Emit info into the dwarf accelerator table sections. + if (useDwarfAccelTables()) { emitAccelNames(); emitAccelObjC(); emitAccelNamespaces(); @@ -825,7 +877,10 @@ void DwarfDebug::endModule() { } // Emit info into a debug pubtypes section. - emitDebugPubTypes(); + // TODO: When we don't need the option anymore we can + // remove all of the code that adds to the table. + if (useDarwinGDBCompat()) + emitDebugPubTypes(); // Emit info into a debug loc section. emitDebugLoc(); @@ -840,7 +895,11 @@ void DwarfDebug::endModule() { emitDebugMacInfo(); // Emit inline info. - emitDebugInlineInfo(); + // TODO: When we don't need the option anymore we + // can remove all of the code that this section + // depends upon. + if (useDarwinGDBCompat()) + emitDebugInlineInfo(); // Emit info into a debug str section. emitDebugStr(); @@ -1014,7 +1073,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, if (AbsVar) AbsVar->setMInsn(MInsn); - // Simple ranges that are fully coalesced. + // Simplify ranges that are fully coalesced. if (History.size() <= 1 || (History.size() == 2 && MInsn->isIdenticalTo(History.back()))) { RegVar->setMInsn(MInsn); @@ -1267,7 +1326,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Coalesce identical entries at the end of History. if (History.size() >= 2 && Prev->isIdenticalTo(History[History.size() - 2])) { - DEBUG(dbgs() << "Coalesce identical DBG_VALUE entries:\n" + DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" << "\t" << *Prev << "\t" << *History[History.size() - 2] << "\n"); History.pop_back(); @@ -1283,7 +1342,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { PrevMBB->getLastNonDebugInstr(); if (LastMI == PrevMBB->end()) { // Drop DBG_VALUE for empty range. - DEBUG(dbgs() << "Drop DBG_VALUE for empty range:\n" + DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n" << "\t" << *Prev << "\n"); History.pop_back(); } @@ -1300,9 +1359,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (!MI->isLabel()) AtBlockEntry = false; - // First known non DBG_VALUE location marks beginning of function - // body. - if (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown()) + // First known non-DBG_VALUE and non-frame setup location marks + // the beginning of the function body. + if (!MI->getFlag(MachineInstr::FrameSetup) && + (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown())) PrologEndLoc = MI->getDebugLoc(); // Check if the instruction clobbers any registers with debug vars. @@ -1382,7 +1442,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { MF->getFunction()->getContext()); recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), FnStartDL.getScope(MF->getFunction()->getContext()), - DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0); + 0); } } @@ -1439,8 +1499,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope); if (!MF->getTarget().Options.DisableFramePointerElim(*MF)) - TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, - dwarf::DW_FORM_flag, 1); + TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(), MMI->getFrameMoves())); @@ -1710,7 +1769,7 @@ void DwarfDebug::emitDebugInfo() { Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"), DwarfAbbrevSectionSym); Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getTargetData().getPointerSize()); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); emitDIE(Die); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID())); @@ -1756,14 +1815,14 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->EmitInt8(0); Asm->OutStreamer.AddComment("Op size"); - Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1); Asm->OutStreamer.AddComment("DW_LNE_set_address"); Asm->EmitInt8(dwarf::DW_LNE_set_address); Asm->OutStreamer.AddComment("Section end label"); Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd), - Asm->getTargetData().getPointerSize(), + Asm->getDataLayout().getPointerSize(), 0/*AddrSpace*/); // Mark end of matrix. @@ -1992,7 +2051,7 @@ void DwarfDebug::emitDebugLoc() { // Start the dwarf loc section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfLocSection()); - unsigned char Size = Asm->getTargetData().getPointerSize(); + unsigned char Size = Asm->getDataLayout().getPointerSize(); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); unsigned index = 1; for (SmallVector<DotDebugLocEntry, 4>::iterator @@ -2089,7 +2148,7 @@ void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfRangesSection()); - unsigned char Size = Asm->getTargetData().getPointerSize(); + unsigned char Size = Asm->getDataLayout().getPointerSize(); for (SmallVector<const MCSymbol *, 8>::iterator I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); I != E; ++I) { @@ -2147,7 +2206,7 @@ void DwarfDebug::emitDebugInlineInfo() { Asm->OutStreamer.AddComment("Dwarf Version"); Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getTargetData().getPointerSize()); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(), E = InlinedSPNodes.end(); I != E; ++I) { @@ -2178,7 +2237,7 @@ void DwarfDebug::emitDebugInlineInfo() { if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc"); Asm->OutStreamer.EmitSymbolValue(LI->first, - Asm->getTargetData().getPointerSize(),0); + Asm->getDataLayout().getPointerSize(),0); } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index d1d6512..61d9a51 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -21,9 +21,9 @@ #include "llvm/MC/MachineLocation.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/UniqueVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/DebugLoc.h" @@ -96,7 +96,8 @@ typedef struct DotDebugLocEntry { DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr) : Begin(B), End(E), Variable(0), Merged(false), Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; } - DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr) + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, + const ConstantInt *IPtr) : Begin(B), End(E), Variable(0), Merged(false), Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; } @@ -158,11 +159,19 @@ public: bool isArtificial() const { if (Var.isArtificial()) return true; - if (Var.getTag() == dwarf::DW_TAG_arg_variable - && getType().isArtificial()) + if (getType().isArtificial()) return true; return false; } + + bool isObjectPointer() const { + if (Var.isObjectPointer()) + return true; + if (getType().isObjectPointer()) + return true; + return false; + } + bool variableHasComplexAddress() const { assert(Var.Verify() && "Invalid complex DbgVariable!"); return Var.hasComplexAddress(); @@ -222,7 +231,7 @@ class DwarfDebug { /// SectionMap - Provides a unique id per text section. /// - UniqueVector<const MCSection*> SectionMap; + SetVector<const MCSection*> SectionMap; /// CurrentFnArguments - List of Arguments (DbgValues) for current function. SmallVector<DbgVariable *, 8> CurrentFnArguments; @@ -307,6 +316,9 @@ class DwarfDebug { // table for the same directory as DW_at_comp_dir. StringRef CompilationDir; + // A holder for the DarwinGDBCompat flag so that the compile unit can use it. + bool isDarwinGDBCompat; + bool hasDwarfAccelTables; private: /// assignAbbrevNumber - Define a unique number for the abbreviation. @@ -520,6 +532,11 @@ public: /// getStringPoolEntry - returns an entry into the string pool with the given /// string text. MCSymbol *getStringPoolEntry(StringRef Str); + + /// useDarwinGDBCompat - returns whether or not to limit some of our debug + /// output to the limitations of darwin gdb. + bool useDarwinGDBCompat() { return isDarwinGDBCompat; } + bool useDwarfAccelTables() { return hasDwarfAccelTables; } }; } // End of namespace llvm diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp index 70cc2e5..08fb6b3 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -24,7 +24,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -417,7 +417,7 @@ void DwarfException::EmitExceptionTable() { // that we're omitting that bit. TTypeEncoding = dwarf::DW_EH_PE_omit; // dwarf::DW_EH_PE_absptr - TypeFormatSize = Asm->getTargetData().getPointerSize(); + TypeFormatSize = Asm->getDataLayout().getPointerSize(); } else { // Okay, we have actual filters or typeinfos to emit. As such, we need to // pick a type encoding for them. We're about to emit a list of pointers to diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index 75f6056..fe9e493 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -43,26 +43,6 @@ protected: /// MMI - Collected machine module information. MachineModuleInfo *MMI; - /// EmitExceptionTable - Emit landing pads and actions. - /// - /// The general organization of the table is complex, but the basic concepts - /// are easy. First there is a header which describes the location and - /// organization of the three components that follow. - /// 1. The landing pad site information describes the range of code covered - /// by the try. In our case it's an accumulation of the ranges covered - /// by the invokes in the try. There is also a reference to the landing - /// pad that handles the exception once processed. Finally an index into - /// the actions table. - /// 2. The action table, in our case, is composed of pairs of type ids - /// and next action offset. Starting with the action index from the - /// landing pad site, each type Id is checked for a match to the current - /// exception. If it matches then the exception and type id are passed - /// on to the landing pad. Otherwise the next action is looked up. This - /// chain is terminated with a next action of zero. If no type id is - /// found the frame is unwound and handling continues. - /// 3. Type id table contains references to all the C++ typeinfo for all - /// catches in the function. This tables is reversed indexed base 1. - /// SharedTypeIds - How many leading type ids two landing pads have in common. static unsigned SharedTypeIds(const LandingPadInfo *L, const LandingPadInfo *R); @@ -119,6 +99,26 @@ protected: const RangeMapType &PadMap, const SmallVectorImpl<const LandingPadInfo *> &LPs, const SmallVectorImpl<unsigned> &FirstActions); + + /// EmitExceptionTable - Emit landing pads and actions. + /// + /// The general organization of the table is complex, but the basic concepts + /// are easy. First there is a header which describes the location and + /// organization of the three components that follow. + /// 1. The landing pad site information describes the range of code covered + /// by the try. In our case it's an accumulation of the ranges covered + /// by the invokes in the try. There is also a reference to the landing + /// pad that handles the exception once processed. Finally an index into + /// the actions table. + /// 2. The action table, in our case, is composed of pairs of type ids + /// and next action offset. Starting with the action index from the + /// landing pad site, each type Id is checked for a match to the current + /// exception. If it matches then the exception and type id are passed + /// on to the landing pad. Otherwise the next action is looked up. This + /// chain is terminated with a next action of zero. If no type id is + /// found the frame is unwound and handling continues. + /// 3. Type id table contains references to all the C++ typeinfo for all + /// catches in the function. This tables is reversed indexed base 1. void EmitExceptionTable(); public: diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 1153817..f7c0119 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -20,7 +20,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/SmallString.h" @@ -91,7 +91,7 @@ void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) { /// either condition is detected in a function which uses the GC. /// void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { - unsigned IntPtrSize = AP.TM.getTargetData()->getPointerSize(); + unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); EmitCamlGlobal(getModule(), AP, "code_end"); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp index b83aa5a..70742a8 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -24,7 +24,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp index fb65bb7..6f4c5a2 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -357,9 +357,8 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, if (I1 == MBB1->begin() && I2 != MBB2->begin()) { --I2; while (I2->isDebugValue()) { - if (I2 == MBB2->begin()) { + if (I2 == MBB2->begin()) return TailLen; - } --I2; } ++I2; @@ -482,21 +481,19 @@ bool BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const { if (getHash() < o.getHash()) return true; - else if (getHash() > o.getHash()) + if (getHash() > o.getHash()) return false; - else if (getBlock()->getNumber() < o.getBlock()->getNumber()) + if (getBlock()->getNumber() < o.getBlock()->getNumber()) return true; - else if (getBlock()->getNumber() > o.getBlock()->getNumber()) + if (getBlock()->getNumber() > o.getBlock()->getNumber()) return false; - else { - // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing - // an object with itself. + // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing + // an object with itself. #ifndef _GLIBCXX_DEBUG - llvm_unreachable("Predecessor appears twice"); + llvm_unreachable("Predecessor appears twice"); #else - return false; + return false; #endif - } } /// CountTerminators - Count the number of terminators in the given @@ -574,7 +571,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // instructions that would be deleted in the merge. MachineFunction *MF = MBB1->getParent(); if (EffectiveTailLen >= 2 && - MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + MF->getFunction()->getFnAttributes(). + hasAttribute(Attributes::OptimizeForSize) && (I1 == MBB1->begin() || I2 == MBB2->begin())) return true; @@ -1554,8 +1552,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) Uses.insert(*AI); } else { - if (Uses.count(Reg)) { - Uses.erase(Reg); + if (Uses.erase(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) Uses.erase(*SubRegs); // Use sub-registers to be conservative } diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp index 939af3f..dee339a 100644 --- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -9,7 +9,6 @@ #define DEBUG_TYPE "calcspillweights" -#include "llvm/Function.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -42,8 +41,7 @@ void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Compute Spill Weights **********\n" - << "********** Function: " - << MF.getFunction()->getName() << '\n'); + << "********** Function: " << MF.getName() << '\n'); LiveIntervals &LIS = getAnalysis<LiveIntervals>(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -166,7 +164,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { continue; float hweight = Hint[hint] += weight; if (TargetRegisterInfo::isPhysicalRegister(hint)) { - if (hweight > bestPhys && LIS.isAllocatable(hint)) + if (hweight > bestPhys && mri.isAllocatable(hint)) bestPhys = hweight, hintPhys = hint; } else { if (hweight > bestVirt) diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp index 0b747fd..22b9140 100644 --- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp @@ -18,7 +18,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetLowering.h" using namespace llvm; @@ -50,7 +50,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, if (MinAlign > (int)Align) Align = MinAlign; MF.getFrameInfo()->ensureMaxAlignment(Align); - TM.getTargetLowering()->HandleByVal(this, Size); + TM.getTargetLowering()->HandleByVal(this, Size, Align); unsigned Offset = AllocateStack(Size, Align); addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); } diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp index fb2c2e8..a53f6f8 100644 --- a/contrib/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp @@ -41,6 +41,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineCopyPropagationPass(Registry); initializeMachineCSEPass(Registry); initializeMachineDominatorTreePass(Registry); + initializeMachinePostDominatorTreePass(Registry); initializeMachineLICMPass(Registry); initializeMachineLoopInfoPass(Registry); initializeMachineModuleInfoPass(Registry); @@ -56,6 +57,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeRegisterCoalescerPass(Registry); initializeSlotIndexesPass(Registry); initializeStackProtectorPass(Registry); + initializeStackColoringPass(Registry); initializeStackSlotColoringPass(Registry); initializeStrongPHIEliminationPass(Registry); initializeTailDuplicatePassPass(Registry); diff --git a/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp b/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp index 99233df..d8e06c3 100644 --- a/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp +++ b/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp @@ -373,7 +373,7 @@ bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) { /// bool CodePlacementOpt::AlignLoops(MachineFunction &MF) { const Function *F = MF.getFunction(); - if (F->hasFnAttr(Attribute::OptimizeForSize)) + if (F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize)) return false; unsigned Align = TLI->getPrefLoopAlignment(); diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index a9de1c749..377b471 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -527,7 +527,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, if (Edge->getKind() == SDep::Anti) { AntiDepReg = Edge->getReg(); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); - if (!RegClassInfo.isAllocatable(AntiDepReg)) + if (!MRI.isAllocatable(AntiDepReg)) // Don't break anti-dependencies on non-allocatable registers. AntiDepReg = 0; else if (KeepRegs.test(AntiDepReg)) diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index b4394e8..8964269 100644 --- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -33,7 +33,6 @@ namespace { const MachineRegisterInfo *MRI; const TargetInstrInfo *TII; BitVector LivePhysRegs; - BitVector ReservedRegs; public: static char ID; // Pass identification, replacement for typeid @@ -70,7 +69,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { // Don't delete live physreg defs, or any reserved register defs. - if (LivePhysRegs.test(Reg) || ReservedRegs.test(Reg)) + if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg)) return false; } else { if (!MRI->use_nodbg_empty(Reg)) @@ -90,9 +89,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); - // Treat reserved registers as always live. - ReservedRegs = TRI->getReservedRegs(MF); - // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will // be cleaned up. @@ -101,7 +97,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock *MBB = &*I; // Start out assuming that reserved registers are live out of this block. - LivePhysRegs = ReservedRegs; + LivePhysRegs = MRI->getReservedRegs(); // Also add any explicit live-out physregs for this block. if (!MBB->empty() && MBB->back().isReturn()) diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp index f9347ef..d5d8404 100644 --- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -18,7 +18,6 @@ #define DEBUG_TYPE "early-ifcvt" #include "MachineTraceMetrics.h" -#include "llvm/Function.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" @@ -32,9 +31,9 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -775,11 +774,11 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" - << "********** Function: " - << ((Value*)MF.getFunction())->getName() << '\n'); + << "********** Function: " << MF.getName() << '\n'); TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); - SchedModel = MF.getTarget().getInstrItineraryData()->SchedModel; + SchedModel = + MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel(); MRI = &MF.getRegInfo(); DomTree = &getAnalysis<MachineDominatorTree>(); Loops = getAnalysisIfAvailable<MachineLoopInfo>(); @@ -798,6 +797,5 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { if (tryConvertIf(I->getBlock())) Changed = true; - MF.verify(this, "After early if-conversion"); return Changed; } diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp index fee8e47..ed78f19 100644 --- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp +++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp @@ -626,9 +626,12 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { } dv->Instrs.push_back(mi); - // Finally set all defs and non-collapsed uses to dv. - for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) { - MachineOperand &mo = mi->getOperand(i); + // Finally set all defs and non-collapsed uses to dv. We must iterate through + // all the operators, including imp-def ones. + for (MachineInstr::mop_iterator ii = mi->operands_begin(), + ee = mi->operands_end(); + ii != ee; ++ii) { + MachineOperand &mo = *ii; if (!mo.isReg()) continue; int rx = regIndex(mo.getReg()); if (rx < 0) continue; @@ -654,7 +657,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { bool anyregs = false; for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end(); I != E; ++I) - if (MF->getRegInfo().isPhysRegOrOverlapUsed(*I)) { + if (MF->getRegInfo().isPhysRegUsed(*I)) { anyregs = true; break; } diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index 7a17331..ffe4b63 100644 --- a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "postrapseudos" #include "llvm/CodeGen/Passes.h" -#include "llvm/Function.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -190,8 +189,7 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Machine Function\n" << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n" - << "********** Function: " - << MF.getFunction()->getName() << '\n'); + << "********** Function: " << MF.getName() << '\n'); TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp index 506b5cf..f4755bb 100644 --- a/contrib/llvm/lib/CodeGen/GCStrategy.cpp +++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp @@ -20,6 +20,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/DominatorInternals.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -387,9 +388,16 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { const TargetFrameLowering *TFI = TM->getFrameLowering(); assert(TFI && "TargetRegisterInfo not available!"); - for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(), - RE = FI->roots_end(); RI != RE; ++RI) - RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num); + for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(); + RI != FI->roots_end();) { + // If the root references a dead object, no need to keep it. + if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) { + RI = FI->removeStackRoot(RI); + } else { + RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num); + ++RI; + } + } } bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp index 4214ba1..31e36f0 100644 --- a/contrib/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -13,7 +13,6 @@ #define DEBUG_TYPE "ifcvt" #include "BranchFolding.h" -#include "llvm/Function.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -282,7 +281,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { } DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" - << MF.getFunction()->getName() << "\'"); + << MF.getName() << "\'"); if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) { DEBUG(dbgs() << " skipped\n"); @@ -997,14 +996,13 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs, } for (unsigned i = 0, e = Defs.size(); i != e; ++i) { unsigned Reg = Defs[i]; - if (Redefs.count(Reg)) { + if (!Redefs.insert(Reg)) { if (AddImpUse) // Treat predicated update as read + write. MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, true/*IsImp*/,false/*IsKill*/, false/*IsDead*/,true/*IsUndef*/)); } else { - Redefs.insert(Reg); for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) Redefs.insert(*SubRegs); } diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp index 07e37af..37828a7 100644 --- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -613,7 +613,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, propagateSiblingValue(SVI); } while (!WorkList.empty()); - // Look up the value we were looking for. We already did this lokup at the + // Look up the value we were looking for. We already did this lookup at the // top of the function, but SibValues may have been invalidated. SVI = SibValues.find(UseVNI); assert(SVI != SibValues.end() && "Didn't compute requested info"); @@ -863,7 +863,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, // If the instruction also writes VirtReg.reg, it had better not require the // same register for uses and defs. SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; - MIBundleOperands::RegInfo RI = + MIBundleOperands::VirtRegInfo RI = MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); if (RI.Tied) { markValueUsed(&VirtReg, ParentVNI); @@ -1142,7 +1142,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Analyze instruction. SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; - MIBundleOperands::RegInfo RI = + MIBundleOperands::VirtRegInfo RI = MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops); // Find the slot index where this instruction reads and writes OldLI. diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp index 8d2282a..6120ae56 100644 --- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -21,7 +21,7 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" using namespace llvm; template <class ArgIt> @@ -457,7 +457,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; // Strip out annotate intrinsic case Intrinsic::memcpy: { - IntegerType *IntPtr = TD.getIntPtrType(Context); + Type *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -468,7 +468,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memmove: { - IntegerType *IntPtr = TD.getIntPtrType(Context); + Type *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -479,7 +479,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memset: { - IntegerType *IntPtr = TD.getIntPtrType(Context); + Type *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp index d631726..defc127 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -687,8 +687,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { clear(); LS.initialize(mf); DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " - << ((Value*)mf.getFunction())->getName() - << " **********\n"); + << mf.getName() << " **********\n"); bool Changed = collectDebugValues(mf); computeIntervals(); diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp index 0a795e6..8585cbb 100644 --- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "RegisterCoalescer.h" #include <algorithm> using namespace llvm; @@ -58,8 +59,16 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, return VNI; } if (SlotIndex::isSameInstr(Def, I->start)) { - assert(I->start == Def && "Cannot insert def, already live"); - assert(I->valno->def == Def && "Inconsistent existing value def"); + assert(I->valno->def == I->start && "Inconsistent existing value def"); + + // It is possible to have both normal and early-clobber defs of the same + // register on an instruction. It doesn't make a lot of sense, but it is + // possible to specify in inline assembly. + // + // Just convert everything to early-clobber. + Def = std::min(Def, I->start); + if (Def != I->start) + I->start = I->valno->def = Def; return I->valno; } assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def"); @@ -68,21 +77,6 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, return VNI; } -/// killedInRange - Return true if the interval has kills in [Start,End). -bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const { - Ranges::const_iterator r = - std::lower_bound(ranges.begin(), ranges.end(), End); - - // Now r points to the first interval with start >= End, or ranges.end(). - if (r == ranges.begin()) - return false; - - --r; - // Now r points to the last interval with end <= End. - // r->end is the kill point. - return r->end >= Start && r->end < End; -} - // overlaps - Return true if the intersection of the two live intervals is // not empty. // @@ -142,6 +136,48 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other, return false; } +bool LiveInterval::overlaps(const LiveInterval &Other, + const CoalescerPair &CP, + const SlotIndexes &Indexes) const { + assert(!empty() && "empty interval"); + if (Other.empty()) + return false; + + // Use binary searches to find initial positions. + const_iterator I = find(Other.beginIndex()); + const_iterator IE = end(); + if (I == IE) + return false; + const_iterator J = Other.find(I->start); + const_iterator JE = Other.end(); + if (J == JE) + return false; + + for (;;) { + // J has just been advanced to satisfy: + assert(J->end >= I->start); + // Check for an overlap. + if (J->start < I->end) { + // I and J are overlapping. Find the later start. + SlotIndex Def = std::max(I->start, J->start); + // Allow the overlap if Def is a coalescable copy. + if (Def.isBlock() || + !CP.isCoalescable(Indexes.getInstructionFromIndex(Def))) + return true; + } + // Advance the iterator that ends first to check for more overlaps. + if (J->end > I->end) { + std::swap(I, J); + std::swap(IE, JE); + } + // Advance J until J->end >= I->start. + do + if (++J == JE) + return false; + while (J->end < I->start); + } +} + /// overlaps - Return true if the live interval overlaps a range specified /// by [Start, End). bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { @@ -399,7 +435,7 @@ void LiveInterval::join(LiveInterval &Other, // If we have to apply a mapping to our base interval assignment, rewrite it // now. - if (MustMapCurValNos) { + if (MustMapCurValNos && !empty()) { // Map the first live range. iterator OutIt = begin(); @@ -673,27 +709,6 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { return V2; } -void LiveInterval::Copy(const LiveInterval &RHS, - MachineRegisterInfo *MRI, - VNInfo::Allocator &VNInfoAllocator) { - ranges.clear(); - valnos.clear(); - std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(RHS.reg); - MRI->setRegAllocationHint(reg, Hint.first, Hint.second); - - weight = RHS.weight; - for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) { - const VNInfo *VNI = RHS.getValNumInfo(i); - createValueCopy(VNI, VNInfoAllocator); - } - for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) { - const LiveRange &LR = RHS.ranges[i]; - addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id))); - } - - verify(); -} - unsigned LiveInterval::getSize() const { unsigned Sum = 0; for (const_iterator I = begin(), E = end(); I != E; ++I) @@ -705,9 +720,11 @@ raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) { return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")"; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void LiveRange::dump() const { dbgs() << *this << "\n"; } +#endif void LiveInterval::print(raw_ostream &OS) const { if (empty()) @@ -740,9 +757,11 @@ void LiveInterval::print(raw_ostream &OS) const { } } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void LiveInterval::dump() const { dbgs() << *this << "\n"; } +#endif #ifndef NDEBUG void LiveInterval::verify() const { diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp index d0f8ae1..4e75d89 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -34,6 +34,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "LiveRangeCalc.h" +#include "VirtRegMap.h" #include <algorithm> #include <limits> #include <cmath> @@ -109,8 +110,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { DomTree = &getAnalysis<MachineDominatorTree>(); if (!LRCalc) LRCalc = new LiveRangeCalc(); - AllocatableRegs = TRI->getAllocatableSet(fn); - ReservedRegs = TRI->getReservedRegs(fn); // Allocate space for all virtual registers. VirtRegIntervals.resize(MRI->getNumVirtRegs()); @@ -147,6 +146,11 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const { OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n'; } + OS << "RegMasks:"; + for (unsigned i = 0, e = RegMaskSlots.size(); i != e; ++i) + OS << ' ' << RegMaskSlots[i]; + OS << '\n'; + printInstrs(OS); } @@ -155,9 +159,11 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const { MF->print(OS, Indexes); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void LiveIntervals::dumpInstrs() const { printInstrs(dbgs()); } +#endif static bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { @@ -382,8 +388,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, /// which a variable is live void LiveIntervals::computeIntervals() { DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n" - << "********** Function: " - << ((Value*)MF->getFunction())->getName() << '\n'); + << "********** Function: " << MF->getName() << '\n'); RegMaskBlocks.resize(MF->getNumBlockIDs()); @@ -440,7 +445,7 @@ void LiveIntervals::computeIntervals() { // Compute the number of register mask instructions in this block. std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()]; - RMB.second = RegMaskSlots.size() - RMB.first;; + RMB.second = RegMaskSlots.size() - RMB.first; } // Create empty intervals for registers defined by implicit_def's (except @@ -497,7 +502,7 @@ void LiveIntervals::computeRegMasks() { RegMaskBits.push_back(MO->getRegMask()); } // Compute the number of register mask instructions in this block. - RMB.second = RegMaskSlots.size() - RMB.first;; + RMB.second = RegMaskSlots.size() - RMB.first; } } @@ -540,11 +545,11 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { // Ignore uses of reserved registers. We only track defs of those. for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { unsigned Root = *Roots; - if (!isReserved(Root) && !MRI->reg_empty(Root)) + if (!MRI->isReserved(Root) && !MRI->reg_empty(Root)) LRCalc->extendToUses(LI, Root); for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) { unsigned Reg = *Supers; - if (!isReserved(Reg) && !MRI->reg_empty(Reg)) + if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg)) LRCalc->extendToUses(LI, Reg); } } @@ -729,17 +734,100 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, return CanSeparate; } +void LiveIntervals::extendToIndices(LiveInterval *LI, + ArrayRef<SlotIndex> Indices) { + assert(LRCalc && "LRCalc not initialized."); + LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + for (unsigned i = 0, e = Indices.size(); i != e; ++i) + LRCalc->extend(LI, Indices[i]); +} + +void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, + SmallVectorImpl<SlotIndex> *EndPoints) { + LiveRangeQuery LRQ(*LI, Kill); + VNInfo *VNI = LRQ.valueOut(); + if (!VNI) + return; + + MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill); + SlotIndex MBBStart, MBBEnd; + tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB); + + // If VNI isn't live out from KillMBB, the value is trivially pruned. + if (LRQ.endPoint() < MBBEnd) { + LI->removeRange(Kill, LRQ.endPoint()); + if (EndPoints) EndPoints->push_back(LRQ.endPoint()); + return; + } + + // VNI is live out of KillMBB. + LI->removeRange(Kill, MBBEnd); + if (EndPoints) EndPoints->push_back(MBBEnd); + + // Find all blocks that are reachable from KillMBB without leaving VNI's live + // range. It is possible that KillMBB itself is reachable, so start a DFS + // from each successor. + typedef SmallPtrSet<MachineBasicBlock*, 9> VisitedTy; + VisitedTy Visited; + for (MachineBasicBlock::succ_iterator + SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end(); + SuccI != SuccE; ++SuccI) { + for (df_ext_iterator<MachineBasicBlock*, VisitedTy> + I = df_ext_begin(*SuccI, Visited), E = df_ext_end(*SuccI, Visited); + I != E;) { + MachineBasicBlock *MBB = *I; + + // Check if VNI is live in to MBB. + tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB); + LiveRangeQuery LRQ(*LI, MBBStart); + if (LRQ.valueIn() != VNI) { + // This block isn't part of the VNI live range. Prune the search. + I.skipChildren(); + continue; + } + + // Prune the search if VNI is killed in MBB. + if (LRQ.endPoint() < MBBEnd) { + LI->removeRange(MBBStart, LRQ.endPoint()); + if (EndPoints) EndPoints->push_back(LRQ.endPoint()); + I.skipChildren(); + continue; + } + + // VNI is live through MBB. + LI->removeRange(MBBStart, MBBEnd); + if (EndPoints) EndPoints->push_back(MBBEnd); + ++I; + } + } +} //===----------------------------------------------------------------------===// // Register allocator hooks. // -void LiveIntervals::addKillFlags() { +void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { + // Keep track of regunit ranges. + SmallVector<std::pair<LiveInterval*, LiveInterval::iterator>, 8> RU; + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; LiveInterval *LI = &getInterval(Reg); + if (LI->empty()) + continue; + + // Find the regunit intervals for the assigned register. They may overlap + // the virtual register live range, cancelling any kills. + RU.clear(); + for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid(); + ++Units) { + LiveInterval *RUInt = &getRegUnit(*Units); + if (RUInt->empty()) + continue; + RU.push_back(std::make_pair(RUInt, RUInt->find(LI->begin()->end))); + } // Every instruction that kills Reg corresponds to a live range end point. for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; @@ -750,7 +838,32 @@ void LiveIntervals::addKillFlags() { MachineInstr *MI = getInstructionFromIndex(RI->end); if (!MI) continue; - MI->addRegisterKilled(Reg, NULL); + + // Check if any of the reguints are live beyond the end of RI. That could + // happen when a physreg is defined as a copy of a virtreg: + // + // %EAX = COPY %vreg5 + // FOO %vreg5 <--- MI, cancel kill because %EAX is live. + // BAR %EAX<kill> + // + // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX. + bool CancelKill = false; + for (unsigned u = 0, e = RU.size(); u != e; ++u) { + LiveInterval *RInt = RU[u].first; + LiveInterval::iterator &I = RU[u].second; + if (I == RInt->end()) + continue; + I = RInt->advanceTo(I, RI->end); + if (I == RInt->end() || I->start >= RI->end) + continue; + // I is overlapping RI. + CancelKill = true; + break; + } + if (CancelKill) + MI->clearRegisterKills(Reg, NULL); + else + MI->addRegisterKilled(Reg, NULL); } } } @@ -900,497 +1013,321 @@ private: LiveIntervals& LIS; const MachineRegisterInfo& MRI; const TargetRegisterInfo& TRI; + SlotIndex OldIdx; SlotIndex NewIdx; - - typedef std::pair<LiveInterval*, LiveRange*> IntRangePair; - typedef DenseSet<IntRangePair> RangeSet; - - struct RegRanges { - LiveRange* Use; - LiveRange* EC; - LiveRange* Dead; - LiveRange* Def; - RegRanges() : Use(0), EC(0), Dead(0), Def(0) {} - }; - typedef DenseMap<unsigned, RegRanges> BundleRanges; + SmallPtrSet<LiveInterval*, 8> Updated; + bool UpdateFlags; public: HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI, - const TargetRegisterInfo& TRI, SlotIndex NewIdx) - : LIS(LIS), MRI(MRI), TRI(TRI), NewIdx(NewIdx) {} - - // Update intervals for all operands of MI from OldIdx to NewIdx. - // This assumes that MI used to be at OldIdx, and now resides at - // NewIdx. - void moveAllRangesFrom(MachineInstr* MI, SlotIndex OldIdx) { - assert(NewIdx != OldIdx && "No-op move? That's a bit strange."); - - // Collect the operands. - RangeSet Entering, Internal, Exiting; - bool hasRegMaskOp = false; - collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx); - - // To keep the LiveRanges valid within an interval, move the ranges closest - // to the destination first. This prevents ranges from overlapping, to that - // APIs like removeRange still work. - if (NewIdx < OldIdx) { - moveAllEnteringFrom(OldIdx, Entering); - moveAllInternalFrom(OldIdx, Internal); - moveAllExitingFrom(OldIdx, Exiting); - } - else { - moveAllExitingFrom(OldIdx, Exiting); - moveAllInternalFrom(OldIdx, Internal); - moveAllEnteringFrom(OldIdx, Entering); - } - - if (hasRegMaskOp) - updateRegMaskSlots(OldIdx); - -#ifndef NDEBUG - LIValidator validator; - validator = std::for_each(Entering.begin(), Entering.end(), validator); - validator = std::for_each(Internal.begin(), Internal.end(), validator); - validator = std::for_each(Exiting.begin(), Exiting.end(), validator); - assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness."); -#endif - + const TargetRegisterInfo& TRI, + SlotIndex OldIdx, SlotIndex NewIdx, bool UpdateFlags) + : LIS(LIS), MRI(MRI), TRI(TRI), OldIdx(OldIdx), NewIdx(NewIdx), + UpdateFlags(UpdateFlags) {} + + // FIXME: UpdateFlags is a workaround that creates live intervals for all + // physregs, even those that aren't needed for regalloc, in order to update + // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill + // flags, and postRA passes will use a live register utility instead. + LiveInterval *getRegUnitLI(unsigned Unit) { + if (UpdateFlags) + return &LIS.getRegUnit(Unit); + return LIS.getCachedRegUnit(Unit); } - // Update intervals for all operands of MI to refer to BundleStart's - // SlotIndex. - void moveAllRangesInto(MachineInstr* MI, MachineInstr* BundleStart) { - if (MI == BundleStart) - return; // Bundling instr with itself - nothing to do. - - SlotIndex OldIdx = LIS.getSlotIndexes()->getInstructionIndex(MI); - assert(LIS.getSlotIndexes()->getInstructionFromIndex(OldIdx) == MI && - "SlotIndex <-> Instruction mapping broken for MI"); - - // Collect all ranges already in the bundle. - MachineBasicBlock::instr_iterator BII(BundleStart); - RangeSet Entering, Internal, Exiting; - bool hasRegMaskOp = false; - collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx); - assert(!hasRegMaskOp && "Can't have RegMask operand in bundle."); - for (++BII; &*BII == MI || BII->isInsideBundle(); ++BII) { - if (&*BII == MI) + /// Update all live ranges touched by MI, assuming a move from OldIdx to + /// NewIdx. + void updateAllRanges(MachineInstr *MI) { + DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI); + bool hasRegMask = false; + for (MIOperands MO(MI); MO.isValid(); ++MO) { + if (MO->isRegMask()) + hasRegMask = true; + if (!MO->isReg()) continue; - collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx); - assert(!hasRegMaskOp && "Can't have RegMask operand in bundle."); - } - - BundleRanges BR = createBundleRanges(Entering, Internal, Exiting); - - Entering.clear(); - Internal.clear(); - Exiting.clear(); - collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx); - assert(!hasRegMaskOp && "Can't have RegMask operand in bundle."); + // Aggressively clear all kill flags. + // They are reinserted by VirtRegRewriter. + if (MO->isUse()) + MO->setIsKill(false); - DEBUG(dbgs() << "Entering: " << Entering.size() << "\n"); - DEBUG(dbgs() << "Internal: " << Internal.size() << "\n"); - DEBUG(dbgs() << "Exiting: " << Exiting.size() << "\n"); - - moveAllEnteringFromInto(OldIdx, Entering, BR); - moveAllInternalFromInto(OldIdx, Internal, BR); - moveAllExitingFromInto(OldIdx, Exiting, BR); - - -#ifndef NDEBUG - LIValidator validator; - validator = std::for_each(Entering.begin(), Entering.end(), validator); - validator = std::for_each(Internal.begin(), Internal.end(), validator); - validator = std::for_each(Exiting.begin(), Exiting.end(), validator); - assert(validator.rangesOk() && "moveAllOperandsInto broke liveness."); -#endif - } - -private: - -#ifndef NDEBUG - class LIValidator { - private: - DenseSet<const LiveInterval*> Checked, Bogus; - public: - void operator()(const IntRangePair& P) { - const LiveInterval* LI = P.first; - if (Checked.count(LI)) - return; - Checked.insert(LI); - if (LI->empty()) - return; - SlotIndex LastEnd = LI->begin()->start; - for (LiveInterval::const_iterator LRI = LI->begin(), LRE = LI->end(); - LRI != LRE; ++LRI) { - const LiveRange& LR = *LRI; - if (LastEnd > LR.start || LR.start >= LR.end) - Bogus.insert(LI); - LastEnd = LR.end; - } - } - - bool rangesOk() const { - return Bogus.empty(); - } - }; -#endif - - // Collect IntRangePairs for all operands of MI that may need fixing. - // Treat's MI's index as OldIdx (regardless of what it is in SlotIndexes' - // maps). - void collectRanges(MachineInstr* MI, RangeSet& Entering, RangeSet& Internal, - RangeSet& Exiting, bool& hasRegMaskOp, SlotIndex OldIdx) { - hasRegMaskOp = false; - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); - MOI != MOE; ++MOI) { - const MachineOperand& MO = *MOI; - - if (MO.isRegMask()) { - hasRegMaskOp = true; + unsigned Reg = MO->getReg(); + if (!Reg) continue; - } - - if (!MO.isReg() || MO.getReg() == 0) - continue; - - unsigned Reg = MO.getReg(); - - // TODO: Currently we're skipping uses that are reserved or have no - // interval, but we're not updating their kills. This should be - // fixed. - if (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg)) + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + updateRange(LIS.getInterval(Reg)); continue; - - // Collect ranges for register units. These live ranges are computed on - // demand, so just skip any that haven't been computed yet. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) - if (LiveInterval *LI = LIS.getCachedRegUnit(*Units)) - collectRanges(MO, LI, Entering, Internal, Exiting, OldIdx); - } else { - // Collect ranges for individual virtual registers. - collectRanges(MO, &LIS.getInterval(Reg), - Entering, Internal, Exiting, OldIdx); } + + // For physregs, only update the regunits that actually have a + // precomputed live range. + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) + if (LiveInterval *LI = getRegUnitLI(*Units)) + updateRange(*LI); } + if (hasRegMask) + updateRegMaskSlots(); } - void collectRanges(const MachineOperand &MO, LiveInterval *LI, - RangeSet &Entering, RangeSet &Internal, RangeSet &Exiting, - SlotIndex OldIdx) { - if (MO.readsReg()) { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx); - if (LR != 0) - Entering.insert(std::make_pair(LI, LR)); - } - if (MO.isDef()) { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot()); - assert(LR != 0 && "No live range for def?"); - if (LR->end > OldIdx.getDeadSlot()) - Exiting.insert(std::make_pair(LI, LR)); +private: + /// Update a single live range, assuming an instruction has been moved from + /// OldIdx to NewIdx. + void updateRange(LiveInterval &LI) { + if (!Updated.insert(&LI)) + return; + DEBUG({ + dbgs() << " "; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) + dbgs() << PrintReg(LI.reg); else - Internal.insert(std::make_pair(LI, LR)); - } + dbgs() << PrintRegUnit(LI.reg, &TRI); + dbgs() << ":\t" << LI << '\n'; + }); + if (SlotIndex::isEarlierInstr(OldIdx, NewIdx)) + handleMoveDown(LI); + else + handleMoveUp(LI); + DEBUG(dbgs() << " -->\t" << LI << '\n'); + LI.verify(); } - BundleRanges createBundleRanges(RangeSet& Entering, - RangeSet& Internal, - RangeSet& Exiting) { - BundleRanges BR; - - for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); - EI != EE; ++EI) { - LiveInterval* LI = EI->first; - LiveRange* LR = EI->second; - BR[LI->reg].Use = LR; - } + /// Update LI to reflect an instruction has been moved downwards from OldIdx + /// to NewIdx. + /// + /// 1. Live def at OldIdx: + /// Move def to NewIdx, assert endpoint after NewIdx. + /// + /// 2. Live def at OldIdx, killed at NewIdx: + /// Change to dead def at NewIdx. + /// (Happens when bundling def+kill together). + /// + /// 3. Dead def at OldIdx: + /// Move def to NewIdx, possibly across another live value. + /// + /// 4. Def at OldIdx AND at NewIdx: + /// Remove live range [OldIdx;NewIdx) and value defined at OldIdx. + /// (Happens when bundling multiple defs together). + /// + /// 5. Value read at OldIdx, killed before NewIdx: + /// Extend kill to NewIdx. + /// + void handleMoveDown(LiveInterval &LI) { + // First look for a kill at OldIdx. + LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex()); + LiveInterval::iterator E = LI.end(); + // Is LI even live at OldIdx? + if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) + return; - for (RangeSet::iterator II = Internal.begin(), IE = Internal.end(); - II != IE; ++II) { - LiveInterval* LI = II->first; - LiveRange* LR = II->second; - if (LR->end.isDead()) { - BR[LI->reg].Dead = LR; - } else { - BR[LI->reg].EC = LR; - } + // Handle a live-in value. + if (!SlotIndex::isSameInstr(I->start, OldIdx)) { + bool isKill = SlotIndex::isSameInstr(OldIdx, I->end); + // If the live-in value already extends to NewIdx, there is nothing to do. + if (!SlotIndex::isEarlierInstr(I->end, NewIdx)) + return; + // Aggressively remove all kill flags from the old kill point. + // Kill flags shouldn't be used while live intervals exist, they will be + // reinserted by VirtRegRewriter. + if (MachineInstr *KillMI = LIS.getInstructionFromIndex(I->end)) + for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO) + if (MO->isReg() && MO->isUse()) + MO->setIsKill(false); + // Adjust I->end to reach NewIdx. This may temporarily make LI invalid by + // overlapping ranges. Case 5 above. + I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); + // If this was a kill, there may also be a def. Otherwise we're done. + if (!isKill) + return; + ++I; } - for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end(); - EI != EE; ++EI) { - LiveInterval* LI = EI->first; - LiveRange* LR = EI->second; - BR[LI->reg].Def = LR; + // Check for a def at OldIdx. + if (I == E || !SlotIndex::isSameInstr(OldIdx, I->start)) + return; + // We have a def at OldIdx. + VNInfo *DefVNI = I->valno; + assert(DefVNI->def == I->start && "Inconsistent def"); + DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); + // If the defined value extends beyond NewIdx, just move the def down. + // This is case 1 above. + if (SlotIndex::isEarlierInstr(NewIdx, I->end)) { + I->start = DefVNI->def; + return; } - - return BR; - } - - void moveKillFlags(unsigned reg, SlotIndex OldIdx, SlotIndex newKillIdx) { - MachineInstr* OldKillMI = LIS.getInstructionFromIndex(OldIdx); - if (!OldKillMI->killsRegister(reg)) - return; // Bail out if we don't have kill flags on the old register. - MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx); - assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill."); - assert(!NewKillMI->killsRegister(reg) && - "New kill instr is already a kill."); - OldKillMI->clearRegisterKills(reg, &TRI); - NewKillMI->addRegisterKilled(reg, &TRI); - } - - void updateRegMaskSlots(SlotIndex OldIdx) { - SmallVectorImpl<SlotIndex>::iterator RI = - std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(), - OldIdx); - assert(*RI == OldIdx && "No RegMask at OldIdx."); - *RI = NewIdx; - assert(*prior(RI) < *RI && *RI < *next(RI) && - "RegSlots out of order. Did you move one call across another?"); - } - - // Return the last use of reg between NewIdx and OldIdx. - SlotIndex findLastUseBefore(unsigned Reg, SlotIndex OldIdx) { - SlotIndex LastUse = NewIdx; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI.use_nodbg_begin(Reg), - UE = MRI.use_nodbg_end(); - UI != UE; UI.skipInstruction()) { - const MachineInstr* MI = &*UI; - SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI); - if (InstSlot > LastUse && InstSlot < OldIdx) - LastUse = InstSlot; + // The remaining possibilities are now: + // 2. Live def at OldIdx, killed at NewIdx: isSameInstr(I->end, NewIdx). + // 3. Dead def at OldIdx: I->end = OldIdx.getDeadSlot(). + // In either case, it is possible that there is an existing def at NewIdx. + assert((I->end == OldIdx.getDeadSlot() || + SlotIndex::isSameInstr(I->end, NewIdx)) && + "Cannot move def below kill"); + LiveInterval::iterator NewI = LI.advanceTo(I, NewIdx.getRegSlot()); + if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) { + // There is an existing def at NewIdx, case 4 above. The def at OldIdx is + // coalesced into that value. + assert(NewI->valno != DefVNI && "Multiple defs of value?"); + LI.removeValNo(DefVNI); + return; } - return LastUse; + // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx. + // If the def at OldIdx was dead, we allow it to be moved across other LI + // values. The new range should be placed immediately before NewI, move any + // intermediate ranges up. + assert(NewI != I && "Inconsistent iterators"); + std::copy(llvm::next(I), NewI, I); + *llvm::prior(NewI) = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } - void moveEnteringUpFrom(SlotIndex OldIdx, IntRangePair& P) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - bool LiveThrough = LR->end > OldIdx.getRegSlot(); - if (LiveThrough) + /// Update LI to reflect an instruction has been moved upwards from OldIdx + /// to NewIdx. + /// + /// 1. Live def at OldIdx: + /// Hoist def to NewIdx. + /// + /// 2. Dead def at OldIdx: + /// Hoist def+end to NewIdx, possibly move across other values. + /// + /// 3. Dead def at OldIdx AND existing def at NewIdx: + /// Remove value defined at OldIdx, coalescing it with existing value. + /// + /// 4. Live def at OldIdx AND existing def at NewIdx: + /// Remove value defined at NewIdx, hoist OldIdx def to NewIdx. + /// (Happens when bundling multiple defs together). + /// + /// 5. Value killed at OldIdx: + /// Hoist kill to NewIdx, then scan for last kill between NewIdx and + /// OldIdx. + /// + void handleMoveUp(LiveInterval &LI) { + // First look for a kill at OldIdx. + LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex()); + LiveInterval::iterator E = LI.end(); + // Is LI even live at OldIdx? + if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) return; - SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx); - if (LastUse != NewIdx) - moveKillFlags(LI->reg, NewIdx, LastUse); - LR->end = LastUse.getRegSlot(); - } - void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - // Extend the LiveRange if NewIdx is past the end. - if (NewIdx > LR->end) { - // Move kill flags if OldIdx was not originally the end - // (otherwise LR->end points to an invalid slot). - if (LR->end.getRegSlot() != OldIdx.getRegSlot()) { - assert(LR->end > OldIdx && "LiveRange does not cover original slot"); - moveKillFlags(LI->reg, LR->end, NewIdx); + // Handle a live-in value. + if (!SlotIndex::isSameInstr(I->start, OldIdx)) { + // If the live-in value isn't killed here, there is nothing to do. + if (!SlotIndex::isSameInstr(OldIdx, I->end)) + return; + // Adjust I->end to end at NewIdx. If we are hoisting a kill above + // another use, we need to search for that use. Case 5 above. + I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); + ++I; + // If OldIdx also defines a value, there couldn't have been another use. + if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) { + // No def, search for the new kill. + // This can never be an early clobber kill since there is no def. + llvm::prior(I)->end = findLastUseBefore(LI.reg).getRegSlot(); + return; } - LR->end = NewIdx.getRegSlot(); - } - } - - void moveAllEnteringFrom(SlotIndex OldIdx, RangeSet& Entering) { - bool GoingUp = NewIdx < OldIdx; - - if (GoingUp) { - for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); - EI != EE; ++EI) - moveEnteringUpFrom(OldIdx, *EI); - } else { - for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); - EI != EE; ++EI) - moveEnteringDownFrom(OldIdx, *EI); } - } - - void moveInternalFrom(SlotIndex OldIdx, IntRangePair& P) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() && - LR->end <= OldIdx.getDeadSlot() && - "Range should be internal to OldIdx."); - LiveRange Tmp(*LR); - Tmp.start = NewIdx.getRegSlot(LR->start.isEarlyClobber()); - Tmp.valno->def = Tmp.start; - Tmp.end = LR->end.isDead() ? NewIdx.getDeadSlot() : NewIdx.getRegSlot(); - LI->removeRange(*LR); - LI->addRange(Tmp); - } - - void moveAllInternalFrom(SlotIndex OldIdx, RangeSet& Internal) { - for (RangeSet::iterator II = Internal.begin(), IE = Internal.end(); - II != IE; ++II) - moveInternalFrom(OldIdx, *II); - } - - void moveExitingFrom(SlotIndex OldIdx, IntRangePair& P) { - LiveRange* LR = P.second; - assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() && - "Range should start in OldIdx."); - assert(LR->end > OldIdx.getDeadSlot() && "Range should exit OldIdx."); - SlotIndex NewStart = NewIdx.getRegSlot(LR->start.isEarlyClobber()); - LR->start = NewStart; - LR->valno->def = NewStart; - } - - void moveAllExitingFrom(SlotIndex OldIdx, RangeSet& Exiting) { - for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end(); - EI != EE; ++EI) - moveExitingFrom(OldIdx, *EI); - } - void moveEnteringUpFromInto(SlotIndex OldIdx, IntRangePair& P, - BundleRanges& BR) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - bool LiveThrough = LR->end > OldIdx.getRegSlot(); - if (LiveThrough) { - assert((LR->start < NewIdx || BR[LI->reg].Def == LR) && - "Def in bundle should be def range."); - assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) && - "If bundle has use for this reg it should be LR."); - BR[LI->reg].Use = LR; + // Now deal with the def at OldIdx. + assert(I != E && SlotIndex::isSameInstr(I->start, OldIdx) && "No def?"); + VNInfo *DefVNI = I->valno; + assert(DefVNI->def == I->start && "Inconsistent def"); + DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); + + // Check for an existing def at NewIdx. + LiveInterval::iterator NewI = LI.find(NewIdx.getRegSlot()); + if (SlotIndex::isSameInstr(NewI->start, NewIdx)) { + assert(NewI->valno != DefVNI && "Same value defined more than once?"); + // There is an existing def at NewIdx. + if (I->end.isDead()) { + // Case 3: Remove the dead def at OldIdx. + LI.removeValNo(DefVNI); + return; + } + // Case 4: Replace def at NewIdx with live def at OldIdx. + I->start = DefVNI->def; + LI.removeValNo(NewI->valno); return; } - SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx); - moveKillFlags(LI->reg, OldIdx, LastUse); - - if (LR->start < NewIdx) { - // Becoming a new entering range. - assert(BR[LI->reg].Dead == 0 && BR[LI->reg].Def == 0 && - "Bundle shouldn't be re-defining reg mid-range."); - assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) && - "Bundle shouldn't have different use range for same reg."); - LR->end = LastUse.getRegSlot(); - BR[LI->reg].Use = LR; - } else { - // Becoming a new Dead-def. - assert(LR->start == NewIdx.getRegSlot(LR->start.isEarlyClobber()) && - "Live range starting at unexpected slot."); - assert(BR[LI->reg].Def == LR && "Reg should have def range."); - assert(BR[LI->reg].Dead == 0 && - "Can't have def and dead def of same reg in a bundle."); - LR->end = LastUse.getDeadSlot(); - BR[LI->reg].Dead = BR[LI->reg].Def; - BR[LI->reg].Def = 0; - } - } - - void moveEnteringDownFromInto(SlotIndex OldIdx, IntRangePair& P, - BundleRanges& BR) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - if (NewIdx > LR->end) { - // Range extended to bundle. Add to bundle uses. - // Note: Currently adds kill flags to bundle start. - assert(BR[LI->reg].Use == 0 && - "Bundle already has use range for reg."); - moveKillFlags(LI->reg, LR->end, NewIdx); - LR->end = NewIdx.getRegSlot(); - BR[LI->reg].Use = LR; - } else { - assert(BR[LI->reg].Use != 0 && - "Bundle should already have a use range for reg."); - } - } - - void moveAllEnteringFromInto(SlotIndex OldIdx, RangeSet& Entering, - BundleRanges& BR) { - bool GoingUp = NewIdx < OldIdx; - - if (GoingUp) { - for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); - EI != EE; ++EI) - moveEnteringUpFromInto(OldIdx, *EI, BR); - } else { - for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); - EI != EE; ++EI) - moveEnteringDownFromInto(OldIdx, *EI, BR); + // There is no existing def at NewIdx. Hoist DefVNI. + if (!I->end.isDead()) { + // Leave the end point of a live def. + I->start = DefVNI->def; + return; } - } - void moveInternalFromInto(SlotIndex OldIdx, IntRangePair& P, - BundleRanges& BR) { - // TODO: Sane rules for moving ranges into bundles. + // DefVNI is a dead def. It may have been moved across other values in LI, + // so move I up to NewI. Slide [NewI;I) down one position. + std::copy_backward(NewI, I, llvm::next(I)); + *NewI = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } - void moveAllInternalFromInto(SlotIndex OldIdx, RangeSet& Internal, - BundleRanges& BR) { - for (RangeSet::iterator II = Internal.begin(), IE = Internal.end(); - II != IE; ++II) - moveInternalFromInto(OldIdx, *II, BR); + void updateRegMaskSlots() { + SmallVectorImpl<SlotIndex>::iterator RI = + std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(), + OldIdx); + assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() && + "No RegMask at OldIdx."); + *RI = NewIdx.getRegSlot(); + assert((RI == LIS.RegMaskSlots.begin() || + SlotIndex::isEarlierInstr(*llvm::prior(RI), *RI)) && + "Cannot move regmask instruction above another call"); + assert((llvm::next(RI) == LIS.RegMaskSlots.end() || + SlotIndex::isEarlierInstr(*RI, *llvm::next(RI))) && + "Cannot move regmask instruction below another call"); } - void moveExitingFromInto(SlotIndex OldIdx, IntRangePair& P, - BundleRanges& BR) { - LiveInterval* LI = P.first; - LiveRange* LR = P.second; - - assert(LR->start.isRegister() && - "Don't know how to merge exiting ECs into bundles yet."); + // Return the last use of reg between NewIdx and OldIdx. + SlotIndex findLastUseBefore(unsigned Reg) { + SlotIndex LastUse = NewIdx; - if (LR->end > NewIdx.getDeadSlot()) { - // This range is becoming an exiting range on the bundle. - // If there was an old dead-def of this reg, delete it. - if (BR[LI->reg].Dead != 0) { - LI->removeRange(*BR[LI->reg].Dead); - BR[LI->reg].Dead = 0; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI.use_nodbg_begin(Reg), + UE = MRI.use_nodbg_end(); + UI != UE; UI.skipInstruction()) { + const MachineInstr* MI = &*UI; + SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI); + if (InstSlot > LastUse && InstSlot < OldIdx) + LastUse = InstSlot; } - assert(BR[LI->reg].Def == 0 && - "Can't have two defs for the same variable exiting a bundle."); - LR->start = NewIdx.getRegSlot(); - LR->valno->def = LR->start; - BR[LI->reg].Def = LR; } else { - // This range is becoming internal to the bundle. - assert(LR->end == NewIdx.getRegSlot() && - "Can't bundle def whose kill is before the bundle"); - if (BR[LI->reg].Dead || BR[LI->reg].Def) { - // Already have a def for this. Just delete range. - LI->removeRange(*LR); - } else { - // Make range dead, record. - LR->end = NewIdx.getDeadSlot(); - BR[LI->reg].Dead = LR; - assert(BR[LI->reg].Use == LR && - "Range becoming dead should currently be use."); + MachineInstr* MI = LIS.getSlotIndexes()->getInstructionFromIndex(NewIdx); + MachineBasicBlock::iterator MII(MI); + ++MII; + MachineBasicBlock* MBB = MI->getParent(); + for (; MII != MBB->end() && LIS.getInstructionIndex(MII) < OldIdx; ++MII){ + for (MachineInstr::mop_iterator MOI = MII->operands_begin(), + MOE = MII->operands_end(); + MOI != MOE; ++MOI) { + const MachineOperand& mop = *MOI; + if (!mop.isReg() || mop.getReg() == 0 || + TargetRegisterInfo::isVirtualRegister(mop.getReg())) + continue; + + if (TRI.hasRegUnit(mop.getReg(), Reg)) + LastUse = LIS.getInstructionIndex(MII); + } } - // In both cases the range is no longer a use on the bundle. - BR[LI->reg].Use = 0; } + return LastUse; } - - void moveAllExitingFromInto(SlotIndex OldIdx, RangeSet& Exiting, - BundleRanges& BR) { - for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end(); - EI != EE; ++EI) - moveExitingFromInto(OldIdx, *EI, BR); - } - }; -void LiveIntervals::handleMove(MachineInstr* MI) { +void LiveIntervals::handleMove(MachineInstr* MI, bool UpdateFlags) { + assert(!MI->isBundled() && "Can't handle bundled instructions yet."); SlotIndex OldIndex = Indexes->getInstructionIndex(MI); Indexes->removeMachineInstrFromMaps(MI); - SlotIndex NewIndex = MI->isInsideBundle() ? - Indexes->getInstructionIndex(MI) : - Indexes->insertMachineInstrInMaps(MI); + SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI); assert(getMBBStartIdx(MI->getParent()) <= OldIndex && OldIndex < getMBBEndIdx(MI->getParent()) && "Cannot handle moves across basic block boundaries."); - assert(!MI->isBundled() && "Can't handle bundled instructions yet."); - HMEditor HME(*this, *MRI, *TRI, NewIndex); - HME.moveAllRangesFrom(MI, OldIndex); + HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags); + HME.updateAllRanges(MI); } void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, - MachineInstr* BundleStart) { + MachineInstr* BundleStart, + bool UpdateFlags) { + SlotIndex OldIndex = Indexes->getInstructionIndex(MI); SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart); - HMEditor HME(*this, *MRI, *TRI, NewIndex); - HME.moveAllRangesInto(MI, BundleStart); + HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags); + HME.updateAllRanges(MI); } diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h index cd4e690..4d41fca 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h +++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h @@ -178,8 +178,8 @@ public: bool checkLoopInterference(MachineLoopRange*); private: - Query(const Query&); // DO NOT IMPLEMENT - void operator=(const Query&); // DO NOT IMPLEMENT + Query(const Query&) LLVM_DELETED_FUNCTION; + void operator=(const Query&) LLVM_DELETED_FUNCTION; }; // Array of LiveIntervalUnions. diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp index d828f25..c3ff4f1 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -65,7 +65,11 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { // Visit all operands that read Reg. This may include partial defs. for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg), E = MRI->reg_nodbg_end(); I != E; ++I) { - const MachineOperand &MO = I.getOperand(); + MachineOperand &MO = I.getOperand(); + // Clear all kill flags. They will be reinserted after register allocation + // by LiveIntervalAnalysis::addKillFlags(). + if (MO.isUse()) + MO.setIsKill(false); if (!MO.readsReg()) continue; // MI is reading Reg. We may have visited MI before if it happens to be diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp index b4ce9aa..f8fbc7d 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -87,7 +87,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, // We can't remat physreg uses, unless it is a constant. if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { - if (MRI.isConstantPhysReg(MO.getReg(), VRM->getMachineFunction())) + if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent())) continue; return false; } @@ -96,6 +96,13 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, const VNInfo *OVNI = li.getVNInfoAt(OrigIdx); if (!OVNI) continue; + + // Don't allow rematerialization immediately after the original def. + // It would be incorrect if OrigMI redefines the register. + // See PR14098. + if (SlotIndex::isSameInstr(OrigIdx, UseIdx)) + return false; + if (OVNI != li.getVNInfoAt(UseIdx)) return false; } @@ -249,7 +256,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, unsigned Reg = MOI->getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) { // Check if MI reads any unreserved physregs. - if (Reg && MOI->readsReg() && !LIS.isReserved(Reg)) + if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) ReadsPhysRegs = true; continue; } diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp index cdb1776..7f22478 100644 --- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "regalloc" #include "LiveRegMatrix.h" +#include "RegisterCoalescer.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -117,8 +118,9 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg) { if (VirtReg.empty()) return false; + CoalescerPair CP(VirtReg.reg, PhysReg, *TRI); for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) - if (VirtReg.overlaps(LIS->getRegUnit(*Units))) + if (VirtReg.overlaps(LIS->getRegUnit(*Units), CP, *LIS->getSlotIndexes())) return true; return false; } diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.h b/contrib/llvm/lib/CodeGen/LiveRegMatrix.h index b3e2d7f..8f22c24 100644 --- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.h +++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.h @@ -15,7 +15,7 @@ // Register units are defined in MCRegisterInfo.h, they represent the smallest // unit of interference when dealing with overlapping physical registers. The // LiveRegMatrix is represented as a LiveIntervalUnion per register unit. When -// a virtual register is assigned to a physicval register, the live range for +// a virtual register is assigned to a physical register, the live range for // the virtual register is inserted into the LiveIntervalUnion for each regunit // in the physreg. // diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp index 939e795..f0b522b 100644 --- a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp @@ -25,7 +25,10 @@ using namespace llvm; char LiveStacks::ID = 0; -INITIALIZE_PASS(LiveStacks, "livestacks", +INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks", + "Live Stack Slot Analysis", false, false) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_END(LiveStacks, "livestacks", "Live Stack Slot Analysis", false, false) char &llvm::LiveStacksID = LiveStacks::ID; diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp index 348ed3a..6ea933d 100644 --- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp @@ -65,6 +65,7 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { } void LiveVariables::VarInfo::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) dbgs() << " Alive in blocks: "; for (SparseBitVector<>::iterator I = AliveBlocks.begin(), E = AliveBlocks.end(); I != E; ++I) @@ -77,6 +78,7 @@ void LiveVariables::VarInfo::dump() const { dbgs() << "\n #" << i << ": " << *Kills[i]; dbgs() << "\n"; } +#endif } /// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg. @@ -501,8 +503,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MRI = &mf.getRegInfo(); TRI = MF->getTarget().getRegisterInfo(); - ReservedRegisters = TRI->getReservedRegs(mf); - unsigned NumRegs = TRI->getNumRegs(); PhysRegDef = new MachineInstr*[NumRegs]; PhysRegUse = new MachineInstr*[NumRegs]; @@ -586,7 +586,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { unsigned MOReg = UseRegs[i]; if (TargetRegisterInfo::isVirtualRegister(MOReg)) HandleVirtRegUse(MOReg, MBB, MI); - else if (!ReservedRegisters[MOReg]) + else if (!MRI->isReserved(MOReg)) HandlePhysRegUse(MOReg, MI); } @@ -599,7 +599,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { unsigned MOReg = DefRegs[i]; if (TargetRegisterInfo::isVirtualRegister(MOReg)) HandleVirtRegDef(MOReg, MI); - else if (!ReservedRegisters[MOReg]) + else if (!MRI->isReserved(MOReg)) HandlePhysRegDef(MOReg, MI, Defs); } UpdatePhysRegDefs(MI, Defs); @@ -806,18 +806,44 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *SuccBB) { const unsigned NumNew = BB->getNumber(); - // All registers used by PHI nodes in SuccBB must be live through BB. - for (MachineBasicBlock::iterator BBI = SuccBB->begin(), - BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI) + SmallSet<unsigned, 16> Defs, Kills; + + MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end(); + for (; BBI != BBE && BBI->isPHI(); ++BBI) { + // Record the def of the PHI node. + Defs.insert(BBI->getOperand(0).getReg()); + + // All registers used by PHI nodes in SuccBB must be live through BB. for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) if (BBI->getOperand(i+1).getMBB() == BB) getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew); + } + + // Record all vreg defs and kills of all instructions in SuccBB. + for (; BBI != BBE; ++BBI) { + for (MachineInstr::mop_iterator I = BBI->operands_begin(), + E = BBI->operands_end(); I != E; ++I) { + if (I->isReg() && TargetRegisterInfo::isVirtualRegister(I->getReg())) { + if (I->isDef()) + Defs.insert(I->getReg()); + else if (I->isKill()) + Kills.insert(I->getReg()); + } + } + } // Update info for all live variables for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + + // If the Defs is defined in the successor it can't be live in BB. + if (Defs.count(Reg)) + continue; + + // If the register is either killed in or live through SuccBB it's also live + // through BB. VarInfo &VI = getVarInfo(Reg); - if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI)) + if (Kills.count(Reg) || VI.AliveBlocks.test(SuccBB->getNumber())) VI.AliveBlocks.set(NumNew); } } diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp index fa6b450..18d021d 100644 --- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -21,7 +21,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Assembly/Writer.h" @@ -145,7 +145,8 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { instr_iterator I = instr_begin(), E = instr_end(); while (I != E && I->isPHI()) ++I; - assert(!I->isInsideBundle() && "First non-phi MI cannot be inside a bundle!"); + assert((I == E || !I->isInsideBundle()) && + "First non-phi MI cannot be inside a bundle!"); return I; } @@ -156,7 +157,7 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { ++I; // FIXME: This needs to change if we wish to bundle labels / dbg_values // inside the bundle. - assert(!I->isInsideBundle() && + assert((I == E || !I->isInsideBundle()) && "First non-phi / non-label instruction is inside a bundle!"); return I; } @@ -228,9 +229,11 @@ const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const { return 0; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineBasicBlock::dump() const { print(dbgs()); } +#endif StringRef MachineBasicBlock::getName() const { if (const BasicBlock *LBB = getBasicBlock()) @@ -243,7 +246,7 @@ StringRef MachineBasicBlock::getName() const { std::string MachineBasicBlock::getFullName() const { std::string Name; if (getParent()) - Name = (getParent()->getFunction()->getName() + ":").str(); + Name = (getParent()->getName() + ":").str(); if (getBasicBlock()) Name += getBasicBlock()->getName(); else @@ -942,12 +945,11 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { /// getSuccWeight - Return weight of the edge from this block to MBB. /// -uint32_t MachineBasicBlock::getSuccWeight(const MachineBasicBlock *succ) const { +uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const { if (Weights.empty()) return 0; - const_succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); - return *getWeightIterator(I); + return *getWeightIterator(Succ); } /// getWeightIterator - Return wight iterator corresonding to the I successor @@ -970,6 +972,80 @@ getWeightIterator(MachineBasicBlock::const_succ_iterator I) const { return Weights.begin() + index; } +/// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed +/// as of just before "MI". +/// +/// Search is localised to a neighborhood of +/// Neighborhood instructions before (searching for defs or kills) and N +/// instructions after (searching just for defs) MI. +MachineBasicBlock::LivenessQueryResult +MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, + unsigned Reg, MachineInstr *MI, + unsigned Neighborhood) { + + unsigned N = Neighborhood; + MachineBasicBlock *MBB = MI->getParent(); + + // Start by searching backwards from MI, looking for kills, reads or defs. + + MachineBasicBlock::iterator I(MI); + // If this is the first insn in the block, don't search backwards. + if (I != MBB->begin()) { + do { + --I; + + MachineOperandIteratorBase::PhysRegInfo Analysis = + MIOperands(I).analyzePhysReg(Reg, TRI); + + if (Analysis.Kills) + // Register killed, so isn't live. + return LQR_Dead; + + else if (Analysis.DefinesOverlap || Analysis.ReadsOverlap) + // Defined or read without a previous kill - live. + return (Analysis.Defines || Analysis.Reads) ? + LQR_Live : LQR_OverlappingLive; + + } while (I != MBB->begin() && --N > 0); + } + + // Did we get to the start of the block? + if (I == MBB->begin()) { + // If so, the register's state is definitely defined by the live-in state. + for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); + RAI.isValid(); ++RAI) { + if (MBB->isLiveIn(*RAI)) + return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive; + } + + return LQR_Dead; + } + + N = Neighborhood; + + // Try searching forwards from MI, looking for reads or defs. + I = MachineBasicBlock::iterator(MI); + // If this is the last insn in the block, don't search forwards. + if (I != MBB->end()) { + for (++I; I != MBB->end() && N > 0; ++I, --N) { + MachineOperandIteratorBase::PhysRegInfo Analysis = + MIOperands(I).analyzePhysReg(Reg, TRI); + + if (Analysis.ReadsOverlap) + // Used, therefore must have been live. + return (Analysis.Reads) ? + LQR_Live : LQR_OverlappingLive; + + else if (Analysis.DefinesOverlap) + // Defined (but not read) therefore cannot have been live. + return LQR_Dead; + } + } + + // At this point we have no idea of the liveness of the register. + return LQR_Unknown; +} + void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB, bool t) { OS << "BB#" << MBB->getNumber(); diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp index c4dca2c..cd3f199 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -500,11 +500,10 @@ void MachineBlockPlacement::buildChain( assert(BB); assert(BlockToChain[BB] == &Chain); assert(*llvm::prior(Chain.end()) == BB); - MachineBasicBlock *BestSucc = 0; // Look for the best viable successor if there is one to place immediately // after this block. - BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); + MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); // If an immediate successor isn't available, look for the best viable // block among those we've identified as not violating the loop's CFG at @@ -1014,7 +1013,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. - if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + if (F.getFunction()->getFnAttributes(). + hasAttribute(Attributes::OptimizeForSize)) return; unsigned Align = TLI->getPrefLoopAlignment(); if (!Align) diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 0cc1af0..4479211 100644 --- a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -38,7 +38,7 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const { Scale = 1; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, *I); + uint32_t Weight = getEdgeWeight(MBB, I); Sum += Weight; } @@ -53,22 +53,30 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const { Sum = 0; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, *I); + uint32_t Weight = getEdgeWeight(MBB, I); Sum += Weight / Scale; } assert(Sum <= UINT32_MAX); return Sum; } -uint32_t -MachineBranchProbabilityInfo::getEdgeWeight(const MachineBasicBlock *Src, - const MachineBasicBlock *Dst) const { +uint32_t MachineBranchProbabilityInfo:: +getEdgeWeight(const MachineBasicBlock *Src, + MachineBasicBlock::const_succ_iterator Dst) const { uint32_t Weight = Src->getSuccWeight(Dst); if (!Weight) return DEFAULT_WEIGHT; return Weight; } +uint32_t MachineBranchProbabilityInfo:: +getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { + // This is a linear search. Try to use the const_succ_iterator version when + // possible. + return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst)); +} + bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src, MachineBasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% @@ -82,7 +90,7 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { MachineBasicBlock *MaxSucc = 0; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, *I); + uint32_t Weight = getEdgeWeight(MBB, I); if (Weight > MaxWeight) { MaxWeight = Weight; MaxSucc = *I; diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp index 896461f..dbc41de 100644 --- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -63,8 +63,6 @@ namespace { virtual void releaseMemory() { ScopeMap.clear(); Exps.clear(); - AllocatableRegs.clear(); - ReservedRegs.clear(); } private: @@ -78,8 +76,6 @@ namespace { ScopedHTType VNT; SmallVector<MachineInstr*, 64> Exps; unsigned CurrVN; - BitVector AllocatableRegs; - BitVector ReservedRegs; bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); bool isPhysDefTriviallyDead(unsigned Reg, @@ -88,7 +84,8 @@ namespace { bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet<unsigned,8> &PhysRefs, - SmallVector<unsigned,2> &PhysDefs) const; + SmallVector<unsigned,2> &PhysDefs, + bool &PhysUseDef) const; bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, SmallSet<unsigned,8> &PhysRefs, SmallVector<unsigned,2> &PhysDefs, @@ -198,31 +195,52 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet<unsigned,8> &PhysRefs, - SmallVector<unsigned,2> &PhysDefs) const{ - MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); + SmallVector<unsigned,2> &PhysDefs, + bool &PhysUseDef) const{ + // First, add all uses to PhysRefs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) + if (!MO.isReg() || MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) continue; - // If the def is dead, it's ok. But the def may not marked "dead". That's - // common since this pass is run before livevariables. We can scan - // forward a few instructions and check if it is obviously dead. - if (MO.isDef() && - (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end()))) - continue; // Reading constant physregs is ok. if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) PhysRefs.insert(*AI); - if (MO.isDef()) + } + + // Next, collect all defs into PhysDefs. If any is already in PhysRefs + // (which currently contains only uses), set the PhysUseDef flag. + PhysUseDef = false; + MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + // Check against PhysRefs even if the def is "dead". + if (PhysRefs.count(Reg)) + PhysUseDef = true; + // If the def is dead, it's ok. But the def may not marked "dead". That's + // common since this pass is run before livevariables. We can scan + // forward a few instructions and check if it is obviously dead. + if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end())) PhysDefs.push_back(Reg); } + // Finally, add all defs to PhysRefs as well. + for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) + for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI) + PhysRefs.insert(*AI); + return !PhysRefs.empty(); } @@ -242,7 +260,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, return false; for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { - if (AllocatableRegs.test(PhysDefs[i]) || ReservedRegs.test(PhysDefs[i])) + if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i])) // Avoid extending live range of physical registers if they are //allocatable or reserved. return false; @@ -411,8 +429,8 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) { DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); assert(SI != ScopeMap.end()); - ScopeMap.erase(SI); delete SI->second; + ScopeMap.erase(SI); } bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { @@ -463,16 +481,22 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool CrossMBBPhysDef = false; SmallSet<unsigned, 8> PhysRefs; SmallVector<unsigned, 2> PhysDefs; - if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) { + bool PhysUseDef = false; + if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, + PhysDefs, PhysUseDef)) { FoundCSE = false; // ... Unless the CS is local or is in the sole predecessor block // and it also defines the physical register which is not clobbered // in between and the physical register uses were not clobbered. - unsigned CSVN = VNT.lookup(MI); - MachineInstr *CSMI = Exps[CSVN]; - if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) - FoundCSE = true; + // This can never be the case if the instruction both uses and + // defines the same physical register, which was detected above. + if (!PhysUseDef) { + unsigned CSVN = VNT.lookup(MI); + MachineInstr *CSMI = Exps[CSVN]; + if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) + FoundCSE = true; + } } if (!FoundCSE) { @@ -635,7 +659,5 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<MachineDominatorTree>(); - AllocatableRegs = TRI->getAllocatableSet(MF); - ReservedRegs = TRI->getReservedRegs(MF); return PerformCSE(DT->getRootNode()); } diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp index bac3aa2..4a79328 100644 --- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -16,6 +16,7 @@ #include "llvm/Pass.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -32,7 +33,7 @@ STATISTIC(NumDeletes, "Number of dead copies deleted"); namespace { class MachineCopyPropagation : public MachineFunctionPass { const TargetRegisterInfo *TRI; - BitVector ReservedRegs; + MachineRegisterInfo *MRI; public: static char ID; // Pass identification, replacement for typeid @@ -146,8 +147,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src); if (CI != AvailCopyMap.end()) { MachineInstr *CopyMI = CI->second; - if (!ReservedRegs.test(Def) && - (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) && + if (!MRI->isReserved(Def) && + (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) && isNopCopy(CopyMI, Def, Src, TRI)) { // The two copies cancel out and the source of the first copy // hasn't been overridden, eliminate the second one. e.g. @@ -259,7 +260,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); DI != DE; ++DI) { unsigned Reg = (*DI)->getOperand(0).getReg(); - if (ReservedRegs.test(Reg) || !MaskMO.clobbersPhysReg(Reg)) + if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg)) continue; (*DI)->eraseFromParent(); Changed = true; @@ -296,7 +297,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { for (SmallSetVector<MachineInstr*, 8>::iterator DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); DI != DE; ++DI) { - if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) { + if (!MRI->isReserved((*DI)->getOperand(0).getReg())) { (*DI)->eraseFromParent(); Changed = true; ++NumDeletes; @@ -311,7 +312,7 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; TRI = MF.getTarget().getRegisterInfo(); - ReservedRegs = TRI->getReservedRegs(MF); + MRI = &MF.getRegInfo(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) Changed |= CopyPropagateBlock(*I); diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp index d4aede8a..91d5211 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -28,7 +28,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetFrameLowering.h" @@ -59,13 +59,13 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, RegInfo = 0; MFInfo = 0; FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering()); - if (Fn->hasFnAttr(Attribute::StackAlignment)) - FrameInfo->ensureMaxAlignment(Attribute::getStackAlignmentFromAttrs( - Fn->getAttributes().getFnAttributes())); - ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData()); + if (Fn->getFnAttributes().hasAttribute(Attributes::StackAlignment)) + FrameInfo->ensureMaxAlignment(Fn->getAttributes(). + getFnAttributes().getStackAlignment()); + ConstantPool = new (Allocator) MachineConstantPool(TM.getDataLayout()); Alignment = TM.getTargetLowering()->getMinFunctionAlignment(); // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. - if (!Fn->hasFnAttr(Attribute::OptimizeForSize)) + if (!Fn->getFnAttributes().hasAttribute(Attributes::OptimizeForSize)) Alignment = std::max(Alignment, TM.getTargetLowering()->getPrefFunctionAlignment()); FunctionNumber = FunctionNum; @@ -284,12 +284,19 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, return std::make_pair(Result, Result + Num); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineFunction::dump() const { print(dbgs()); } +#endif + +StringRef MachineFunction::getName() const { + assert(getFunction() && "No function!"); + return getFunction()->getName(); +} void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { - OS << "# Machine code for function " << Fn->getName() << ": "; + OS << "# Machine code for function " << getName() << ": "; if (RegInfo) { OS << (RegInfo->isSSA() ? "SSA" : "Post SSA"); if (!RegInfo->tracksLiveness()) @@ -334,7 +341,7 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { BB->print(OS, Indexes); } - OS << "\n# End machine code for function " << Fn->getName() << ".\n\n"; + OS << "\n# End machine code for function " << getName() << ".\n\n"; } namespace llvm { @@ -344,7 +351,7 @@ namespace llvm { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const MachineFunction *F) { - return "CFG for '" + F->getFunction()->getName().str() + "' function"; + return "CFG for '" + F->getName().str() + "' function"; } std::string getNodeLabel(const MachineBasicBlock *Node, @@ -377,7 +384,7 @@ namespace llvm { void MachineFunction::viewCFG() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getName()); + ViewGraph(this, "mf" + getName()); #else errs() << "MachineFunction::viewCFG is only available in debug builds on " << "systems with Graphviz or gv!\n"; @@ -387,7 +394,7 @@ void MachineFunction::viewCFG() const void MachineFunction::viewCFGOnly() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getName(), true); + ViewGraph(this, "mf" + getName(), true); #else errs() << "MachineFunction::viewCFGOnly is only available in debug builds on " << "systems with Graphviz or gv!\n"; @@ -453,7 +460,9 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, unsigned StackAlign = TFI.getStackAlignment(); unsigned Align = MinAlign(SPOffset, StackAlign); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, - /*isSS*/false, false)); + /*isSS*/ false, + /*NeedSP*/ false, + /*Alloca*/ 0)); return -++NumFixedObjects; } @@ -525,16 +534,18 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ } } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineFrameInfo::dump(const MachineFunction &MF) const { print(MF, dbgs()); } +#endif //===----------------------------------------------------------------------===// // MachineJumpTableInfo implementation //===----------------------------------------------------------------------===// /// getEntrySize - Return the size of each entry in the jump table. -unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const { +unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const { // The size of a jump table entry is 4 bytes unless the entry is just the // address of a block, in which case it is the pointer size. switch (getEntryKind()) { @@ -553,7 +564,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const { } /// getEntryAlignment - Return the alignment of each entry in the jump table. -unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const { +unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const { // The alignment of a jump table entry is the alignment of int32 unless the // entry is just the address of a block, in which case it is the pointer // alignment. @@ -622,7 +633,9 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const { OS << '\n'; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineJumpTableInfo::dump() const { print(dbgs()); } +#endif //===----------------------------------------------------------------------===// @@ -657,7 +670,7 @@ MachineConstantPool::~MachineConstantPool() { /// CanShareConstantPoolEntry - Test whether the given two constants /// can be allocated the same constant pool entry. static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, - const TargetData *TD) { + const DataLayout *TD) { // Handle the trivial case quickly. if (A == B) return true; @@ -681,7 +694,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, // Try constant folding a bitcast of both instructions to an integer. If we // get two identical ConstantInt's, then we are good to share them. We use // the constant folding APIs to do this so that we get the benefit of - // TargetData. + // DataLayout. if (isa<PointerType>(A->getType())) A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, const_cast<Constant*>(A), TD); @@ -749,10 +762,12 @@ void MachineConstantPool::print(raw_ostream &OS) const { if (Constants[i].isMachineConstantPoolEntry()) Constants[i].Val.MachineCPVal->print(OS); else - OS << *(Value*)Constants[i].Val.ConstVal; + OS << *(const Value*)Constants[i].Val.ConstVal; OS << ", align=" << Constants[i].getAlignment(); OS << "\n"; } } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineConstantPool::dump() const { print(dbgs()); } +#endif diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp index 0102ac7..ed94efb 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -51,7 +51,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { char MachineFunctionPrinterPass::ID = 0; } -char &MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID; +char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID; INITIALIZE_PASS(MachineFunctionPrinterPass, "print-machineinstrs", "Machine Function Printer", false, false) diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index b166849..ce8d520 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -111,6 +111,7 @@ void MachineOperand::setIsDef(bool Val) { /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. void MachineOperand::ChangeToImmediate(int64_t ImmVal) { + assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); // If this operand is currently a register operand, and if this is in a // function, deregister the operand from the register's use/def list. if (isReg() && isOnRegUseList()) @@ -136,7 +137,8 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, RegInfo = &MF->getRegInfo(); // If this operand is already a register operand, remove it from the // register's use/def lists. - if (RegInfo && isReg()) + bool WasReg = isReg(); + if (RegInfo && WasReg) RegInfo->removeRegOperandFromUseList(this); // Change this to a register and set the reg#. @@ -153,6 +155,9 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, IsDebug = isDebug; // Ensure isOnRegUseList() returns false. Contents.Reg.Prev = 0; + // Preserve the tie when the operand was already a register. + if (!WasReg) + TiedTo = 0; // If this operand is embedded in a function, add the operand to the // register's use/def list. @@ -193,7 +198,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return !strcmp(getSymbolName(), Other.getSymbolName()) && getOffset() == Other.getOffset(); case MachineOperand::MO_BlockAddress: - return getBlockAddress() == Other.getBlockAddress(); + return getBlockAddress() == Other.getBlockAddress() && + getOffset() == Other.getOffset(); case MO_RegisterMask: return getRegMask() == Other.getRegMask(); case MachineOperand::MO_MCSymbol: @@ -208,8 +214,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { hash_code llvm::hash_value(const MachineOperand &MO) { switch (MO.getType()) { case MachineOperand::MO_Register: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getReg(), - MO.getSubReg(), MO.isDef()); + // Register operands don't have target flags. + return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef()); case MachineOperand::MO_Immediate: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); case MachineOperand::MO_CImmediate: @@ -234,7 +240,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) { MO.getOffset()); case MachineOperand::MO_BlockAddress: return hash_combine(MO.getType(), MO.getTargetFlags(), - MO.getBlockAddress()); + MO.getBlockAddress(), MO.getOffset()); case MachineOperand::MO_RegisterMask: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); case MachineOperand::MO_Metadata: @@ -262,7 +268,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << PrintReg(getReg(), TRI, getSubReg()); if (isDef() || isKill() || isDead() || isImplicit() || isUndef() || - isInternalRead() || isEarlyClobber()) { + isInternalRead() || isEarlyClobber() || isTied()) { OS << '<'; bool NeedComma = false; if (isDef()) { @@ -282,27 +288,32 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { NeedComma = true; } - if (isKill() || isDead() || (isUndef() && isUse()) || isInternalRead()) { + if (isKill()) { if (NeedComma) OS << ','; - NeedComma = false; - if (isKill()) { - OS << "kill"; - NeedComma = true; - } - if (isDead()) { - OS << "dead"; - NeedComma = true; - } - if (isUndef() && isUse()) { - if (NeedComma) OS << ','; - OS << "undef"; - NeedComma = true; - } - if (isInternalRead()) { - if (NeedComma) OS << ','; - OS << "internal"; - NeedComma = true; - } + OS << "kill"; + NeedComma = true; + } + if (isDead()) { + if (NeedComma) OS << ','; + OS << "dead"; + NeedComma = true; + } + if (isUndef() && isUse()) { + if (NeedComma) OS << ','; + OS << "undef"; + NeedComma = true; + } + if (isInternalRead()) { + if (NeedComma) OS << ','; + OS << "internal"; + NeedComma = true; + } + if (isTied()) { + if (NeedComma) OS << ','; + OS << "tied"; + if (TiedTo != 15) + OS << unsigned(TiedTo - 1); + NeedComma = true; } OS << '>'; } @@ -352,6 +363,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { case MachineOperand::MO_BlockAddress: OS << '<'; WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false); + if (getOffset()) OS << "+" << getOffset(); OS << '>'; break; case MachineOperand::MO_RegisterMask: @@ -528,20 +540,6 @@ void MachineInstr::addImplicitDefUseOperands() { /// MachineInstr ctor - This constructor creates a MachineInstr and adds the /// implicit operands. It reserves space for the number of operands specified by /// the MCInstrDesc. -MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp) - : MCID(&tid), Flags(0), AsmPrinterFlags(0), - NumMemRefs(0), MemRefs(0), Parent(0) { - unsigned NumImplicitOps = 0; - if (!NoImp) - NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + MCID->getNumOperands()); - if (!NoImp) - addImplicitDefUseOperands(); - // Make sure that we get added to a machine basicblock - LeakDetector::addGarbageObject(this); -} - -/// MachineInstr ctor - As above, but with a DebugLoc. MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, bool NoImp) : MCID(&tid), Flags(0), AsmPrinterFlags(0), @@ -559,21 +557,6 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, /// MachineInstr ctor - Work exactly the same as the ctor two above, except /// that the MachineInstr is created and added to the end of the specified /// basic block. -MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid) - : MCID(&tid), Flags(0), AsmPrinterFlags(0), - NumMemRefs(0), MemRefs(0), Parent(0) { - assert(MBB && "Cannot use inserting ctor with null basic block!"); - unsigned NumImplicitOps = - MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + MCID->getNumOperands()); - addImplicitDefUseOperands(); - // Make sure that we get added to a machine basicblock - LeakDetector::addGarbageObject(this); - MBB->push_back(this); // Add instruction to end of basic block! -} - -/// MachineInstr ctor - As above, but with a DebugLoc. -/// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, const MCInstrDesc &tid) : MCID(&tid), Flags(0), AsmPrinterFlags(0), @@ -673,6 +656,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (!isImpReg && !isInlineAsm()) { while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) { --OpNo; + assert(!Operands[OpNo].isTied() && "Cannot move tied operands"); if (RegInfo) RegInfo->removeRegOperandFromUseList(&Operands[OpNo]); } @@ -708,12 +692,25 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Operands[OpNo].isReg()) { // Ensure isOnRegUseList() returns false, regardless of Op's status. Operands[OpNo].Contents.Reg.Prev = 0; + // Ignore existing ties. This is not a property that can be copied. + Operands[OpNo].TiedTo = 0; // Add the new operand to RegInfo. if (RegInfo) RegInfo->addRegOperandToUseList(&Operands[OpNo]); - // If the register operand is flagged as early, mark the operand as such. - if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) - Operands[OpNo].setIsEarlyClobber(true); + // The MCID operand information isn't accurate until we start adding + // explicit operands. The implicit operands are added first, then the + // explicits are inserted before them. + if (!isImpReg) { + // Tie uses to defs as indicated in MCInstrDesc. + if (Operands[OpNo].isUse()) { + int DefIdx = MCID->getOperandConstraint(OpNo, MCOI::TIED_TO); + if (DefIdx != -1) + tieOperands(DefIdx, OpNo); + } + // If the register operand is flagged as early, mark the operand as such. + if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) + Operands[OpNo].setIsEarlyClobber(true); + } } // Re-add all the implicit ops. @@ -730,6 +727,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { /// void MachineInstr::RemoveOperand(unsigned OpNo) { assert(OpNo < Operands.size() && "Invalid operand number"); + untieRegOperand(OpNo); MachineRegisterInfo *RegInfo = getRegInfo(); // Special case removing the last one. @@ -752,6 +750,13 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { } } +#ifndef NDEBUG + // Moving tied operands would break the ties. + for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) + if (Operands[i].isReg()) + assert(!Operands[i].isTied() && "Cannot move tied operands"); +#endif + Operands.erase(Operands.begin()+OpNo); if (RegInfo) { @@ -935,6 +940,12 @@ bool MachineInstr::isStackAligningInlineAsm() const { return false; } +InlineAsm::AsmDialect MachineInstr::getInlineAsmDialect() const { + assert(isInlineAsm() && "getInlineAsmDialect() only works for inline asms!"); + unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + return InlineAsm::AsmDialect((ExtraInfo & InlineAsm::Extra_AsmDialect) != 0); +} + int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx, unsigned *GroupNo) const { assert(isInlineAsm() && "Expected an inline asm instruction"); @@ -1004,9 +1015,10 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, unsigned MachineInstr::getBundleSize() const { assert(isBundle() && "Expecting a bundle"); - MachineBasicBlock::const_instr_iterator I = *this; + const MachineBasicBlock *MBB = getParent(); + MachineBasicBlock::const_instr_iterator I = *this, E = MBB->instr_end(); unsigned Size = 0; - while ((++I)->isInsideBundle()) { + while ((++I != E) && I->isInsideBundle()) { ++Size; } assert(Size > 1 && "Malformed bundle"); @@ -1114,107 +1126,99 @@ int MachineInstr::findFirstPredOperandIdx() const { return -1; } -/// isRegTiedToUseOperand - Given the index of a register def operand, -/// check if the register def is tied to a source operand, due to either -/// two-address elimination or inline assembly constraints. Returns the -/// first tied use operand index by reference is UseOpIdx is not null. -bool MachineInstr:: -isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { - if (isInlineAsm()) { - assert(DefOpIdx > InlineAsm::MIOp_FirstOperand); - const MachineOperand &MO = getOperand(DefOpIdx); - if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) - return false; - // Determine the actual operand index that corresponds to this index. - unsigned DefNo = 0; - int FlagIdx = findInlineAsmFlagIdx(DefOpIdx, &DefNo); - if (FlagIdx < 0) - return false; +// MachineOperand::TiedTo is 4 bits wide. +const unsigned TiedMax = 15; - // Which part of the group is DefOpIdx? - unsigned DefPart = DefOpIdx - (FlagIdx + 1); - - for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); - i != e; ++i) { - const MachineOperand &FMO = getOperand(i); - if (!FMO.isImm()) - continue; - if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse()) - continue; - unsigned Idx; - if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) && - Idx == DefNo) { - if (UseOpIdx) - *UseOpIdx = (unsigned)i + 1 + DefPart; - return true; - } - } - return false; +/// tieOperands - Mark operands at DefIdx and UseIdx as tied to each other. +/// +/// Use and def operands can be tied together, indicated by a non-zero TiedTo +/// field. TiedTo can have these values: +/// +/// 0: Operand is not tied to anything. +/// 1 to TiedMax-1: Tied to getOperand(TiedTo-1). +/// TiedMax: Tied to an operand >= TiedMax-1. +/// +/// The tied def must be one of the first TiedMax operands on a normal +/// instruction. INLINEASM instructions allow more tied defs. +/// +void MachineInstr::tieOperands(unsigned DefIdx, unsigned UseIdx) { + MachineOperand &DefMO = getOperand(DefIdx); + MachineOperand &UseMO = getOperand(UseIdx); + assert(DefMO.isDef() && "DefIdx must be a def operand"); + assert(UseMO.isUse() && "UseIdx must be a use operand"); + assert(!DefMO.isTied() && "Def is already tied to another use"); + assert(!UseMO.isTied() && "Use is already tied to another def"); + + if (DefIdx < TiedMax) + UseMO.TiedTo = DefIdx + 1; + else { + // Inline asm can use the group descriptors to find tied operands, but on + // normal instruction, the tied def must be within the first TiedMax + // operands. + assert(isInlineAsm() && "DefIdx out of range"); + UseMO.TiedTo = TiedMax; } - assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!"); - const MCInstrDesc &MCID = getDesc(); - for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = getOperand(i); - if (MO.isReg() && MO.isUse() && - MCID.getOperandConstraint(i, MCOI::TIED_TO) == (int)DefOpIdx) { - if (UseOpIdx) - *UseOpIdx = (unsigned)i; - return true; - } - } - return false; + // UseIdx can be out of range, we'll search for it in findTiedOperandIdx(). + DefMO.TiedTo = std::min(UseIdx + 1, TiedMax); } -/// isRegTiedToDefOperand - Return true if the operand of the specified index -/// is a register use and it is tied to an def operand. It also returns the def -/// operand index by reference. -bool MachineInstr:: -isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { - if (isInlineAsm()) { - const MachineOperand &MO = getOperand(UseOpIdx); - if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0) - return false; +/// Given the index of a tied register operand, find the operand it is tied to. +/// Defs are tied to uses and vice versa. Returns the index of the tied operand +/// which must exist. +unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const { + const MachineOperand &MO = getOperand(OpIdx); + assert(MO.isTied() && "Operand isn't tied"); - // Find the flag operand corresponding to UseOpIdx - int FlagIdx = findInlineAsmFlagIdx(UseOpIdx); - if (FlagIdx < 0) - return false; + // Normally TiedTo is in range. + if (MO.TiedTo < TiedMax) + return MO.TiedTo - 1; - const MachineOperand &UFMO = getOperand(FlagIdx); - unsigned DefNo; - if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) { - if (!DefOpIdx) - return true; - - unsigned DefIdx = InlineAsm::MIOp_FirstOperand; - // Remember to adjust the index. First operand is asm string, second is - // the HasSideEffects and AlignStack bits, then there is a flag for each. - while (DefNo) { - const MachineOperand &FMO = getOperand(DefIdx); - assert(FMO.isImm()); - // Skip over this def. - DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1; - --DefNo; - } - *DefOpIdx = DefIdx + UseOpIdx - FlagIdx; - return true; + // Uses on normal instructions can be out of range. + if (!isInlineAsm()) { + // Normal tied defs must be in the 0..TiedMax-1 range. + if (MO.isUse()) + return TiedMax - 1; + // MO is a def. Search for the tied use. + for (unsigned i = TiedMax - 1, e = getNumOperands(); i != e; ++i) { + const MachineOperand &UseMO = getOperand(i); + if (UseMO.isReg() && UseMO.isUse() && UseMO.TiedTo == OpIdx + 1) + return i; } - return false; + llvm_unreachable("Can't find tied use"); } - const MCInstrDesc &MCID = getDesc(); - if (UseOpIdx >= MCID.getNumOperands()) - return false; - const MachineOperand &MO = getOperand(UseOpIdx); - if (!MO.isReg() || !MO.isUse()) - return false; - int DefIdx = MCID.getOperandConstraint(UseOpIdx, MCOI::TIED_TO); - if (DefIdx == -1) - return false; - if (DefOpIdx) - *DefOpIdx = (unsigned)DefIdx; - return true; + // Now deal with inline asm by parsing the operand group descriptor flags. + // Find the beginning of each operand group. + SmallVector<unsigned, 8> GroupIdx; + unsigned OpIdxGroup = ~0u; + unsigned NumOps; + for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e; + i += NumOps) { + const MachineOperand &FlagMO = getOperand(i); + assert(FlagMO.isImm() && "Invalid tied operand on inline asm"); + unsigned CurGroup = GroupIdx.size(); + GroupIdx.push_back(i); + NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm()); + // OpIdx belongs to this operand group. + if (OpIdx > i && OpIdx < i + NumOps) + OpIdxGroup = CurGroup; + unsigned TiedGroup; + if (!InlineAsm::isUseOperandTiedToDef(FlagMO.getImm(), TiedGroup)) + continue; + // Operands in this group are tied to operands in TiedGroup which must be + // earlier. Find the number of operands between the two groups. + unsigned Delta = i - GroupIdx[TiedGroup]; + + // OpIdx is a use tied to TiedGroup. + if (OpIdxGroup == CurGroup) + return OpIdx - Delta; + + // OpIdx is a def tied to this use group. + if (OpIdxGroup == TiedGroup) + return OpIdx + Delta; + } + llvm_unreachable("Invalid tied operand on inline asm"); } /// clearKillInfo - Clears kill flags on all operands. @@ -1292,7 +1296,12 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, AliasAnalysis *AA, bool &SawStore) const { // Ignore stuff that we obviously can't move. - if (mayStore() || isCall()) { + // + // Treat volatile loads as stores. This is not strictly necessary for + // volatiles, but it is required for atomic loads. It is not allowed to move + // a load across an atomic load with Ordering > Monotonic. + if (mayStore() || isCall() || + (mayLoad() && hasOrderedMemoryRef())) { SawStore = true; return false; } @@ -1308,8 +1317,8 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, // load. if (mayLoad() && !isInvariantLoad(AA)) // Otherwise, this is a real load. If there is a store between the load and - // end of block, or if the load is volatile, we can't move it. - return !SawStore && !hasVolatileMemoryRef(); + // end of block, we can't move it. + return !SawStore; return true; } @@ -1340,11 +1349,11 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, return true; } -/// hasVolatileMemoryRef - Return true if this instruction may have a -/// volatile memory reference, or if the information describing the -/// memory reference is not available. Return false if it is known to -/// have no volatile memory references. -bool MachineInstr::hasVolatileMemoryRef() const { +/// hasOrderedMemoryRef - Return true if this instruction may have an ordered +/// or volatile memory reference, or if the information describing the memory +/// reference is not available. Return false if it is known to have no ordered +/// memory references. +bool MachineInstr::hasOrderedMemoryRef() const { // An instruction known never to access memory won't have a volatile access. if (!mayStore() && !mayLoad() && @@ -1357,9 +1366,9 @@ bool MachineInstr::hasVolatileMemoryRef() const { if (memoperands_empty()) return true; - // Check the memory reference information for volatile references. + // Check the memory reference information for ordered references. for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I) - if ((*I)->isVolatile()) + if (!(*I)->isUnordered()) return true; return false; @@ -1461,7 +1470,9 @@ void MachineInstr::copyImplicitOps(const MachineInstr *MI) { } void MachineInstr::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) dbgs() << " " << *this; +#endif } static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, @@ -1540,6 +1551,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { OS << " [sideeffect]"; if (ExtraInfo & InlineAsm::Extra_IsAlignStack) OS << " [alignstack]"; + if (getInlineAsmDialect() == InlineAsm::AD_ATT) + OS << " [attdialect]"; + if (getInlineAsmDialect() == InlineAsm::AD_Intel) + OS << " [inteldialect]"; StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand; FirstOp = false; diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp index b7de7bf..1f7fbfc 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -109,10 +109,10 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, FirstMI, FirstMI->getDebugLoc(), TII->get(TargetOpcode::BUNDLE)); - SmallVector<unsigned, 8> LocalDefs; - SmallSet<unsigned, 8> LocalDefSet; + SmallVector<unsigned, 32> LocalDefs; + SmallSet<unsigned, 32> LocalDefSet; SmallSet<unsigned, 8> DeadDefSet; - SmallSet<unsigned, 8> KilledDefSet; + SmallSet<unsigned, 16> KilledDefSet; SmallVector<unsigned, 8> ExternUses; SmallSet<unsigned, 8> ExternUseSet; SmallSet<unsigned, 8> KilledUseSet; @@ -181,7 +181,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, Defs.clear(); } - SmallSet<unsigned, 8> Added; + SmallSet<unsigned, 32> Added; for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { unsigned Reg = LocalDefs[i]; if (Added.insert(Reg)) { @@ -248,10 +248,10 @@ bool llvm::finalizeBundles(MachineFunction &MF) { // MachineOperand iterator //===----------------------------------------------------------------------===// -MachineOperandIteratorBase::RegInfo +MachineOperandIteratorBase::VirtRegInfo MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg, SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) { - RegInfo RI = { false, false, false }; + VirtRegInfo RI = { false, false, false }; for(; isValid(); ++*this) { MachineOperand &MO = deref(); if (!MO.isReg() || MO.getReg() != Reg) @@ -276,3 +276,53 @@ MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg, } return RI; } + +MachineOperandIteratorBase::PhysRegInfo +MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, + const TargetRegisterInfo *TRI) { + bool AllDefsDead = true; + PhysRegInfo PRI = {false, false, false, false, false, false, false}; + + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "analyzePhysReg not given a physical register!"); + for (; isValid(); ++*this) { + MachineOperand &MO = deref(); + + if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) + PRI.Clobbers = true; // Regmask clobbers Reg. + + if (!MO.isReg()) + continue; + + unsigned MOReg = MO.getReg(); + if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg)) + continue; + + bool IsRegOrSuperReg = MOReg == Reg || TRI->isSubRegister(MOReg, Reg); + bool IsRegOrOverlapping = MOReg == Reg || TRI->regsOverlap(MOReg, Reg); + + if (IsRegOrSuperReg && MO.readsReg()) { + // Reg or a super-reg is read, and perhaps killed also. + PRI.Reads = true; + PRI.Kills = MO.isKill(); + } if (IsRegOrOverlapping && MO.readsReg()) { + PRI.ReadsOverlap = true;// Reg or an overlapping register is read. + } + + if (!MO.isDef()) + continue; + + if (IsRegOrSuperReg) { + PRI.Defines = true; // Reg or a super-register is defined. + if (!MO.isDead()) + AllDefsDead = false; + } + if (IsRegOrOverlapping) + PRI.Clobbers = true; // Reg or an overlapping reg is defined. + } + + if (AllDefsDead && PRI.Defines) + PRI.DefinesDead = true; // Reg or super-register was defined and was dead. + + return PRI; +} diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp index efec481..169443e 100644 --- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -334,7 +334,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); else DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); - DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n"); + DEBUG(dbgs() << MF.getName() << " ********\n"); if (PreRegAlloc) { // Estimate register pressure during pre-regalloc pass. diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp index 9f3829e..27afeec 100644 --- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -74,6 +74,8 @@ MachineBasicBlock *MachineLoop::getBottomBlock() { return BotMBB; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineLoop::dump() const { print(dbgs()); } +#endif diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp index ea98b23..005bf78 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -25,7 +25,7 @@ using namespace llvm; using namespace llvm::dwarf; -// Handle the Pass registration stuff necessary to use TargetData's. +// Handle the Pass registration stuff necessary to use DataLayout's. INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo", "Machine Module Information", false, false) char MachineModuleInfo::ID = 0; diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp index 5ab56c0..a1c7e9f 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -21,8 +21,8 @@ using namespace llvm; //===----------------------------------------------------------------------===// // Out of line virtual method. -void MachineModuleInfoMachO::Anchor() {} -void MachineModuleInfoELF::Anchor() {} +void MachineModuleInfoMachO::anchor() {} +void MachineModuleInfoELF::anchor() {} static int SortSymbolPair(const void *LHS, const void *RHS) { typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy; diff --git a/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp new file mode 100644 index 0000000..c3f6e92 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp @@ -0,0 +1,55 @@ +//===- MachinePostDominators.cpp -Machine Post Dominator Calculation ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements simple dominator construction algorithms for finding +// post dominators on machine functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachinePostDominators.h" + +using namespace llvm; + +char MachinePostDominatorTree::ID = 0; + +//declare initializeMachinePostDominatorTreePass +INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree", + "MachinePostDominator Tree Construction", true, true) + +MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) { + initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry()); + DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate + // postdominator +} + +FunctionPass * +MachinePostDominatorTree::createMachinePostDominatorTreePass() { + return new MachinePostDominatorTree(); +} + +bool +MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { + DT->recalculate(F); + return false; +} + +MachinePostDominatorTree::~MachinePostDominatorTree() { + delete DT; +} + +void +MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void +MachinePostDominatorTree::print(llvm::raw_ostream &OS, const Module *M) const { + DT->print(OS); +} diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 5fb938f..95d7a7d 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -21,7 +21,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) : TRI(&TRI), IsSSA(true), TracksLiveness(true) { VRegInfo.reserve(256); RegAllocHints.reserve(256); - UsedPhysRegs.resize(TRI.getNumRegs()); + UsedRegUnits.resize(TRI.getNumRegUnits()); UsedPhysRegMask.resize(TRI.getNumRegs()); // Create the physreg use/def lists. @@ -32,7 +32,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) MachineRegisterInfo::~MachineRegisterInfo() { #ifndef NDEBUG clearVirtRegs(); - for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i) + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) assert(!PhysRegUseDefLists[i] && "PhysRegUseDefLists has entries after all instructions are deleted"); #endif @@ -306,22 +306,18 @@ void MachineRegisterInfo::dumpUses(unsigned Reg) const { void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) { ReservedRegs = TRI->getReservedRegs(MF); + assert(ReservedRegs.size() == TRI->getNumRegs() && + "Invalid ReservedRegs vector from target"); } bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg, const MachineFunction &MF) const { assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); - // Check if any overlapping register is modified. + // Check if any overlapping register is modified, or allocatable so it may be + // used later. for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) - if (!def_empty(*AI)) - return false; - - // Check if any overlapping register is allocatable so it may be used later. - if (AllocatableRegs.empty()) - AllocatableRegs = TRI->getAllocatableSet(MF); - for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) - if (AllocatableRegs.test(*AI)) + if (!def_empty(*AI) || isAllocatable(*AI)) return false; return true; } diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp index a1dc948..a4817d0 100644 --- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -18,11 +18,8 @@ #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/RegisterPressure.h" -#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleDAGILP.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -35,10 +32,12 @@ using namespace llvm; -static cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, - cl::desc("Force top-down list scheduling")); -static cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, - cl::desc("Force bottom-up list scheduling")); +namespace llvm { +cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, + cl::desc("Force top-down list scheduling")); +cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, + cl::desc("Force bottom-up list scheduling")); +} #ifndef NDEBUG static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, @@ -50,6 +49,15 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG +// Threshold to very roughly model an out-of-order processor's instruction +// buffers. If the actual value of this threshold matters much in practice, then +// it can be specified by the machine model. For now, it's an experimental +// tuning knob to determine when and if it matters. +static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden, + cl::desc("Allow expected latency to exceed the critical path by N cycles " + "before attempting to balance ILP"), + cl::init(10U)); + //===----------------------------------------------------------------------===// // Machine Instruction Scheduling Pass and Registry //===----------------------------------------------------------------------===// @@ -221,7 +229,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // The Scheduler may insert instructions during either schedule() or // exitRegion(), even for empty regions. So the local iterators 'I' and // 'RegionEnd' are invalid across these calls. - unsigned RemainingCount = MBB->size(); + unsigned RemainingInstrs = MBB->size(); for(MachineBasicBlock::iterator RegionEnd = MBB->end(); RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { @@ -230,19 +238,19 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { --RegionEnd; // Count the boundary instruction. - --RemainingCount; + --RemainingInstrs; } // The next region starts above the previous region. Look backward in the // instruction stream until we find the nearest boundary. MachineBasicBlock::iterator I = RegionEnd; - for(;I != MBB->begin(); --I, --RemainingCount) { + for(;I != MBB->begin(); --I, --RemainingInstrs) { if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) break; } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. - Scheduler->enterRegion(MBB, I, RegionEnd, RemainingCount); + Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs); // Skip empty scheduling regions (0 or 1 schedulable instructions). if (I == RegionEnd || I == llvm::prior(RegionEnd)) { @@ -252,11 +260,11 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { continue; } DEBUG(dbgs() << "********** MI Scheduling **********\n"); - DEBUG(dbgs() << MF->getFunction()->getName() + DEBUG(dbgs() << MF->getName() << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; - dbgs() << " Remaining: " << RemainingCount << "\n"); + dbgs() << " Remaining: " << RemainingInstrs << "\n"); // Schedule a region: possibly reorder instructions. // This invalidates 'RegionEnd' and 'I'. @@ -269,7 +277,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // scheduler for the top of it's scheduled region. RegionEnd = Scheduler->begin(); } - assert(RemainingCount == 0 && "Instruction count mismatch!"); + assert(RemainingInstrs == 0 && "Instruction count mismatch!"); Scheduler->finishBlock(); } Scheduler->finalizeSchedule(); @@ -281,157 +289,20 @@ void MachineScheduler::print(raw_ostream &O, const Module* m) const { // unimplemented } -//===----------------------------------------------------------------------===// -// MachineSchedStrategy - Interface to a machine scheduling algorithm. -//===----------------------------------------------------------------------===// - -namespace { -class ScheduleDAGMI; - -/// MachineSchedStrategy - Interface used by ScheduleDAGMI to drive the selected -/// scheduling algorithm. -/// -/// If this works well and targets wish to reuse ScheduleDAGMI, we may expose it -/// in ScheduleDAGInstrs.h -class MachineSchedStrategy { -public: - virtual ~MachineSchedStrategy() {} - - /// Initialize the strategy after building the DAG for a new region. - virtual void initialize(ScheduleDAGMI *DAG) = 0; - - /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to - /// schedule the node at the top of the unscheduled region. Otherwise it will - /// be scheduled at the bottom. - virtual SUnit *pickNode(bool &IsTopNode) = 0; - - /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled a node. - virtual void schedNode(SUnit *SU, bool IsTopNode) = 0; - - /// When all predecessor dependencies have been resolved, free this node for - /// top-down scheduling. - virtual void releaseTopNode(SUnit *SU) = 0; - /// When all successor dependencies have been resolved, free this node for - /// bottom-up scheduling. - virtual void releaseBottomNode(SUnit *SU) = 0; -}; -} // namespace +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ReadyQueue::dump() { + dbgs() << Name << ": "; + for (unsigned i = 0, e = Queue.size(); i < e; ++i) + dbgs() << Queue[i]->NodeNum << " "; + dbgs() << "\n"; +} +#endif //===----------------------------------------------------------------------===// // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals // preservation. //===----------------------------------------------------------------------===// -namespace { -/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules -/// machine instructions while updating LiveIntervals. -class ScheduleDAGMI : public ScheduleDAGInstrs { - AliasAnalysis *AA; - RegisterClassInfo *RegClassInfo; - MachineSchedStrategy *SchedImpl; - - MachineBasicBlock::iterator LiveRegionEnd; - - /// Register pressure in this region computed by buildSchedGraph. - IntervalPressure RegPressure; - RegPressureTracker RPTracker; - - /// List of pressure sets that exceed the target's pressure limit before - /// scheduling, listed in increasing set ID order. Each pressure set is paired - /// with its max pressure in the currently scheduled regions. - std::vector<PressureElement> RegionCriticalPSets; - - /// The top of the unscheduled zone. - MachineBasicBlock::iterator CurrentTop; - IntervalPressure TopPressure; - RegPressureTracker TopRPTracker; - - /// The bottom of the unscheduled zone. - MachineBasicBlock::iterator CurrentBottom; - IntervalPressure BotPressure; - RegPressureTracker BotRPTracker; - -#ifndef NDEBUG - /// The number of instructions scheduled so far. Used to cut off the - /// scheduler at the point determined by misched-cutoff. - unsigned NumInstrsScheduled; -#endif -public: - ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S): - ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS), - AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S), - RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure), - CurrentBottom(), BotRPTracker(BotPressure) { -#ifndef NDEBUG - NumInstrsScheduled = 0; -#endif - } - - ~ScheduleDAGMI() { - delete SchedImpl; - } - - MachineBasicBlock::iterator top() const { return CurrentTop; } - MachineBasicBlock::iterator bottom() const { return CurrentBottom; } - - /// Implement the ScheduleDAGInstrs interface for handling the next scheduling - /// region. This covers all instructions in a block, while schedule() may only - /// cover a subset. - void enterRegion(MachineBasicBlock *bb, - MachineBasicBlock::iterator begin, - MachineBasicBlock::iterator end, - unsigned endcount); - - /// Implement ScheduleDAGInstrs interface for scheduling a sequence of - /// reorderable instructions. - void schedule(); - - /// Get current register pressure for the top scheduled instructions. - const IntervalPressure &getTopPressure() const { return TopPressure; } - const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; } - - /// Get current register pressure for the bottom scheduled instructions. - const IntervalPressure &getBotPressure() const { return BotPressure; } - const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; } - - /// Get register pressure for the entire scheduling region before scheduling. - const IntervalPressure &getRegPressure() const { return RegPressure; } - - const std::vector<PressureElement> &getRegionCriticalPSets() const { - return RegionCriticalPSets; - } - - /// getIssueWidth - Return the max instructions per scheduling group. - unsigned getIssueWidth() const { - return (InstrItins && InstrItins->SchedModel) - ? InstrItins->SchedModel->IssueWidth : 1; - } - - /// getNumMicroOps - Return the number of issue slots required for this MI. - unsigned getNumMicroOps(MachineInstr *MI) const { - if (!InstrItins) return 1; - int UOps = InstrItins->getNumMicroOps(MI->getDesc().getSchedClass()); - return (UOps >= 0) ? UOps : TII->getNumMicroOps(InstrItins, MI); - } - -protected: - void initRegPressure(); - void updateScheduledPressure(std::vector<unsigned> NewMaxPressure); - - void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); - bool checkSchedLimit(); - - void releaseRoots(); - - void releaseSucc(SUnit *SU, SDep *SuccEdge); - void releaseSuccessors(SUnit *SU); - void releasePred(SUnit *SU, SDep *PredEdge); - void releasePredecessors(SUnit *SU); - - void placeDebugValues(); -}; -} // namespace - /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When /// NumPredsLeft reaches zero, release the successor node. /// @@ -498,7 +369,7 @@ void ScheduleDAGMI::moveInstruction(MachineInstr *MI, BB->splice(InsertPos, BB, MI); // Update LiveIntervals - LIS->handleMove(MI); + LIS->handleMove(MI, /*UpdateFlags=*/true); // Recede RegionBegin if an instruction moves above the first. if (RegionBegin == InsertPos) @@ -565,6 +436,9 @@ void ScheduleDAGMI::initRegPressure() { std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure; for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { unsigned Limit = TRI->getRegPressureSetLimit(i); + DEBUG(dbgs() << TRI->getRegPressureSetName(i) + << "Limit " << Limit + << " Actual " << RegionPressure[i] << "\n"); if (RegionPressure[i] > Limit) RegionCriticalPSets.push_back(PressureElement(i, 0)); } @@ -587,6 +461,74 @@ updateScheduledPressure(std::vector<unsigned> NewMaxPressure) { } } +/// schedule - Called back from MachineScheduler::runOnMachineFunction +/// after setting up the current scheduling region. [RegionBegin, RegionEnd) +/// only includes instructions that have DAG nodes, not scheduling boundaries. +/// +/// This is a skeletal driver, with all the functionality pushed into helpers, +/// so that it can be easilly extended by experimental schedulers. Generally, +/// implementing MachineSchedStrategy should be sufficient to implement a new +/// scheduling algorithm. However, if a scheduler further subclasses +/// ScheduleDAGMI then it will want to override this virtual method in order to +/// update any specialized state. +void ScheduleDAGMI::schedule() { + buildDAGWithRegPressure(); + + postprocessDAG(); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + if (ViewMISchedDAGs) viewGraph(); + + initQueues(); + + bool IsTopNode = false; + while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + assert(!SU->isScheduled && "Node already scheduled"); + if (!checkSchedLimit()) + break; + + scheduleMI(SU, IsTopNode); + + updateQueues(SU, IsTopNode); + } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + placeDebugValues(); + + DEBUG({ + unsigned BBNum = top()->getParent()->getNumber(); + dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); +} + +/// Build the DAG and setup three register pressure trackers. +void ScheduleDAGMI::buildDAGWithRegPressure() { + // Initialize the register pressure tracker used by buildSchedGraph. + RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + + // Account for liveness generate by the region boundary. + if (LiveRegionEnd != RegionEnd) + RPTracker.recede(); + + // Build the DAG, and compute current register pressure. + buildSchedGraph(AA, &RPTracker); + if (ViewMISchedDAGs) viewGraph(); + + // Initialize top/bottom trackers after computing region pressure. + initRegPressure(); +} + +/// Apply each ScheduleDAGMutation step in order. +void ScheduleDAGMI::postprocessDAG() { + for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { + Mutations[i]->apply(this); + } +} + // Release all DAG roots for scheduling. void ScheduleDAGMI::releaseRoots() { SmallVector<SUnit*, 16> BotRoots; @@ -607,28 +549,10 @@ void ScheduleDAGMI::releaseRoots() { SchedImpl->releaseBottomNode(*I); } -/// schedule - Called back from MachineScheduler::runOnMachineFunction -/// after setting up the current scheduling region. [RegionBegin, RegionEnd) -/// only includes instructions that have DAG nodes, not scheduling boundaries. -void ScheduleDAGMI::schedule() { - // Initialize the register pressure tracker used by buildSchedGraph. - RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); - - // Account for liveness generate by the region boundary. - if (LiveRegionEnd != RegionEnd) - RPTracker.recede(); - - // Build the DAG, and compute current register pressure. - buildSchedGraph(AA, &RPTracker); - - // Initialize top/bottom trackers after computing region pressure. - initRegPressure(); - - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); - - if (ViewMISchedDAGs) viewGraph(); +/// Identify DAG roots and setup scheduler queues. +void ScheduleDAGMI::initQueues() { + // Initialize the strategy before modifying the DAG. SchedImpl->initialize(this); // Release edges from the special Entry node or to the special Exit node. @@ -638,61 +562,64 @@ void ScheduleDAGMI::schedule() { // Release all DAG roots for scheduling. releaseRoots(); + SchedImpl->registerRoots(); + CurrentTop = nextIfDebug(RegionBegin, RegionEnd); CurrentBottom = RegionEnd; - bool IsTopNode = false; - while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { - if (!checkSchedLimit()) - break; - - // Move the instruction to its new location in the instruction stream. - MachineInstr *MI = SU->getInstr(); - - if (IsTopNode) { - assert(SU->isTopReady() && "node still has unscheduled dependencies"); - if (&*CurrentTop == MI) - CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); - else { - moveInstruction(MI, CurrentTop); - TopRPTracker.setPos(MI); - } +} - // Update top scheduled pressure. - TopRPTracker.advance(); - assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); - updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); +/// Move an instruction and update register pressure. +void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { + // Move the instruction to its new location in the instruction stream. + MachineInstr *MI = SU->getInstr(); - // Release dependent instructions for scheduling. - releaseSuccessors(SU); + if (IsTopNode) { + assert(SU->isTopReady() && "node still has unscheduled dependencies"); + if (&*CurrentTop == MI) + CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); + else { + moveInstruction(MI, CurrentTop); + TopRPTracker.setPos(MI); } + + // Update top scheduled pressure. + TopRPTracker.advance(); + assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); + updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); + } + else { + assert(SU->isBottomReady() && "node still has unscheduled dependencies"); + MachineBasicBlock::iterator priorII = + priorNonDebug(CurrentBottom, CurrentTop); + if (&*priorII == MI) + CurrentBottom = priorII; else { - assert(SU->isBottomReady() && "node still has unscheduled dependencies"); - MachineBasicBlock::iterator priorII = - priorNonDebug(CurrentBottom, CurrentTop); - if (&*priorII == MI) - CurrentBottom = priorII; - else { - if (&*CurrentTop == MI) { - CurrentTop = nextIfDebug(++CurrentTop, priorII); - TopRPTracker.setPos(CurrentTop); - } - moveInstruction(MI, CurrentBottom); - CurrentBottom = MI; + if (&*CurrentTop == MI) { + CurrentTop = nextIfDebug(++CurrentTop, priorII); + TopRPTracker.setPos(CurrentTop); } - // Update bottom scheduled pressure. - BotRPTracker.recede(); - assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); - updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); - - // Release dependent instructions for scheduling. - releasePredecessors(SU); + moveInstruction(MI, CurrentBottom); + CurrentBottom = MI; } - SU->isScheduled = true; - SchedImpl->schedNode(SU, IsTopNode); + // Update bottom scheduled pressure. + BotRPTracker.recede(); + assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); + updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); } - assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); +} - placeDebugValues(); +/// Update scheduler queues after scheduling an instruction. +void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { + // Release dependent instructions for scheduling. + if (IsTopNode) + releaseSuccessors(SU); + else + releasePredecessors(SU); + + SU->isScheduled = true; + + // Notify the scheduling strategy after updating the DAG. + SchedImpl->schedNode(SU, IsTopNode); } /// Reinsert any remaining debug_values, just like the PostRA scheduler. @@ -716,91 +643,146 @@ void ScheduleDAGMI::placeDebugValues() { FirstDbgValue = NULL; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ScheduleDAGMI::dumpSchedule() const { + for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { + if (SUnit *SU = getSUnit(&(*MI))) + SU->dump(this); + else + dbgs() << "Missing SUnit\n"; + } +} +#endif + //===----------------------------------------------------------------------===// // ConvergingScheduler - Implementation of the standard MachineSchedStrategy. //===----------------------------------------------------------------------===// namespace { -/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience -/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified -/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in. -class ReadyQueue { - unsigned ID; - std::string Name; - std::vector<SUnit*> Queue; - +/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance +/// the schedule. +class ConvergingScheduler : public MachineSchedStrategy { public: - ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {} - - unsigned getID() const { return ID; } - - StringRef getName() const { return Name; } - - // SU is in this queue if it's NodeQueueID is a superset of this ID. - bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); } - - bool empty() const { return Queue.empty(); } - - unsigned size() const { return Queue.size(); } - - typedef std::vector<SUnit*>::iterator iterator; + /// Represent the type of SchedCandidate found within a single queue. + /// pickNodeBidirectional depends on these listed by decreasing priority. + enum CandReason { + NoCand, SingleExcess, SingleCritical, ResourceReduce, ResourceDemand, + BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce, + SingleMax, MultiPressure, NextDefUse, NodeOrder}; - iterator begin() { return Queue.begin(); } +#ifndef NDEBUG + static const char *getReasonStr(ConvergingScheduler::CandReason Reason); +#endif - iterator end() { return Queue.end(); } + /// Policy for scheduling the next instruction in the candidate's zone. + struct CandPolicy { + bool ReduceLatency; + unsigned ReduceResIdx; + unsigned DemandResIdx; - iterator find(SUnit *SU) { - return std::find(Queue.begin(), Queue.end(), SU); - } + CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} + }; - void push(SUnit *SU) { - Queue.push_back(SU); - SU->NodeQueueId |= ID; - } + /// Status of an instruction's critical resource consumption. + struct SchedResourceDelta { + // Count critical resources in the scheduled region required by SU. + unsigned CritResources; - void remove(iterator I) { - (*I)->NodeQueueId &= ~ID; - *I = Queue.back(); - Queue.pop_back(); - } + // Count critical resources from another region consumed by SU. + unsigned DemandedResources; - void dump() { - dbgs() << Name << ": "; - for (unsigned i = 0, e = Queue.size(); i < e; ++i) - dbgs() << Queue[i]->NodeNum << " "; - dbgs() << "\n"; - } -}; + SchedResourceDelta(): CritResources(0), DemandedResources(0) {} -/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance -/// the schedule. -class ConvergingScheduler : public MachineSchedStrategy { + bool operator==(const SchedResourceDelta &RHS) const { + return CritResources == RHS.CritResources + && DemandedResources == RHS.DemandedResources; + } + bool operator!=(const SchedResourceDelta &RHS) const { + return !operator==(RHS); + } + }; /// Store the state used by ConvergingScheduler heuristics, required for the /// lifetime of one invocation of pickNode(). struct SchedCandidate { + CandPolicy Policy; + // The best SUnit candidate. SUnit *SU; + // The reason for this candidate. + CandReason Reason; + // Register pressure values for the best candidate. RegPressureDelta RPDelta; - SchedCandidate(): SU(NULL) {} + // Critical resource consumption of the best candidate. + SchedResourceDelta ResDelta; + + SchedCandidate(const CandPolicy &policy) + : Policy(policy), SU(NULL), Reason(NoCand) {} + + bool isValid() const { return SU; } + + // Copy the status of another candidate without changing policy. + void setBest(SchedCandidate &Best) { + assert(Best.Reason != NoCand && "uninitialized Sched candidate"); + SU = Best.SU; + Reason = Best.Reason; + RPDelta = Best.RPDelta; + ResDelta = Best.ResDelta; + } + + void initResourceDelta(const ScheduleDAGMI *DAG, + const TargetSchedModel *SchedModel); + }; + + /// Summarize the unscheduled region. + struct SchedRemainder { + // Critical path through the DAG in expected latency. + unsigned CriticalPath; + + // Unscheduled resources + SmallVector<unsigned, 16> RemainingCounts; + // Critical resource for the unscheduled zone. + unsigned CritResIdx; + // Number of micro-ops left to schedule. + unsigned RemainingMicroOps; + // Is the unscheduled zone resource limited. + bool IsResourceLimited; + + unsigned MaxRemainingCount; + + void reset() { + CriticalPath = 0; + RemainingCounts.clear(); + CritResIdx = 0; + RemainingMicroOps = 0; + IsResourceLimited = false; + MaxRemainingCount = 0; + } + + SchedRemainder() { reset(); } + + void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); }; - /// Represent the type of SchedCandidate found within a single queue. - enum CandResult { - NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure }; /// Each Scheduling boundary is associated with ready queues. It tracks the - /// current cycle in whichever direction at has moved, and maintains the state + /// current cycle in the direction of movement, and maintains the state /// of "hazards" and other interlocks at the current cycle. struct SchedBoundary { ScheduleDAGMI *DAG; + const TargetSchedModel *SchedModel; + SchedRemainder *Rem; ReadyQueue Available; ReadyQueue Pending; bool CheckPending; + // For heuristics, keep a list of the nodes that immediately depend on the + // most recently scheduled node. + SmallPtrSet<const SUnit*, 8> NextSUs; + ScheduleHazardRecognizer *HazardRec; unsigned CurrCycle; @@ -809,29 +791,88 @@ class ConvergingScheduler : public MachineSchedStrategy { /// MinReadyCycle - Cycle of the soonest available instruction. unsigned MinReadyCycle; + // The expected latency of the critical path in this scheduled zone. + unsigned ExpectedLatency; + + // Resources used in the scheduled zone beyond this boundary. + SmallVector<unsigned, 16> ResourceCounts; + + // Cache the critical resources ID in this scheduled zone. + unsigned CritResIdx; + + // Is the scheduled region resource limited vs. latency limited. + bool IsResourceLimited; + + unsigned ExpectedCount; + + // Policy flag: attempt to find ILP until expected latency is covered. + bool ShouldIncreaseILP; + +#ifndef NDEBUG // Remember the greatest min operand latency. unsigned MaxMinLatency; +#endif + + void reset() { + Available.clear(); + Pending.clear(); + CheckPending = false; + NextSUs.clear(); + HazardRec = 0; + CurrCycle = 0; + IssueCount = 0; + MinReadyCycle = UINT_MAX; + ExpectedLatency = 0; + ResourceCounts.resize(1); + assert(!ResourceCounts[0] && "nonzero count for bad resource"); + CritResIdx = 0; + IsResourceLimited = false; + ExpectedCount = 0; + ShouldIncreaseILP = false; +#ifndef NDEBUG + MaxMinLatency = 0; +#endif + // Reserve a zero-count for invalid CritResIdx. + ResourceCounts.resize(1); + } /// Pending queues extend the ready queues with the same ID and the /// PendingFlag set. SchedBoundary(unsigned ID, const Twine &Name): - DAG(0), Available(ID, Name+".A"), - Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"), - CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0), - MinReadyCycle(UINT_MAX), MaxMinLatency(0) {} + DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), + Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") { + reset(); + } ~SchedBoundary() { delete HazardRec; } + void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, + SchedRemainder *rem); + bool isTop() const { return Available.getID() == ConvergingScheduler::TopQID; } + unsigned getUnscheduledLatency(SUnit *SU) const { + if (isTop()) + return SU->getHeight(); + return SU->getDepth(); + } + + unsigned getCriticalCount() const { + return ResourceCounts[CritResIdx]; + } + bool checkHazard(SUnit *SU); + void checkILPPolicy(); + void releaseNode(SUnit *SU, unsigned ReadyCycle); void bumpCycle(); + void countResource(unsigned PIdx, unsigned Cycles); + void bumpNode(SUnit *SU); void releasePending(); @@ -841,10 +882,13 @@ class ConvergingScheduler : public MachineSchedStrategy { SUnit *pickOnlyChoice(); }; +private: ScheduleDAGMI *DAG; + const TargetSchedModel *SchedModel; const TargetRegisterInfo *TRI; // State of the top and bottom scheduled instruction boundaries. + SchedRemainder Rem; SchedBoundary Top; SchedBoundary Bot; @@ -857,7 +901,7 @@ public: }; ConvergingScheduler(): - DAG(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} virtual void initialize(ScheduleDAGMI *dag); @@ -869,28 +913,80 @@ public: virtual void releaseBottomNode(SUnit *SU); + virtual void registerRoots(); + protected: - SUnit *pickNodeBidrectional(bool &IsTopNode); + void balanceZones( + ConvergingScheduler::SchedBoundary &CriticalZone, + ConvergingScheduler::SchedCandidate &CriticalCand, + ConvergingScheduler::SchedBoundary &OppositeZone, + ConvergingScheduler::SchedCandidate &OppositeCand); + + void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, + ConvergingScheduler::SchedCandidate &BotCand); + + void tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + RegPressureTracker &TempTracker); + + SUnit *pickNodeBidirectional(bool &IsTopNode); + + void pickNodeFromQueue(SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + SchedCandidate &Candidate); - CandResult pickNodeFromQueue(ReadyQueue &Q, - const RegPressureTracker &RPTracker, - SchedCandidate &Candidate); #ifndef NDEBUG - void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU, - PressureElement P = PressureElement()); + void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone); #endif }; } // namespace +void ConvergingScheduler::SchedRemainder:: +init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { + reset(); + if (!SchedModel->hasInstrSchedModel()) + return; + RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); + for (std::vector<SUnit>::iterator + I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { + const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); + RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + unsigned PIdx = PI->ProcResourceIdx; + unsigned Factor = SchedModel->getResourceFactor(PIdx); + RemainingCounts[PIdx] += (Factor * PI->Cycles); + } + } +} + +void ConvergingScheduler::SchedBoundary:: +init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { + reset(); + DAG = dag; + SchedModel = smodel; + Rem = rem; + if (SchedModel->hasInstrSchedModel()) + ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); +} + void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { DAG = dag; + SchedModel = DAG->getSchedModel(); TRI = DAG->TRI; - Top.DAG = dag; - Bot.DAG = dag; + Rem.init(DAG, SchedModel); + Top.init(DAG, SchedModel, &Rem); + Bot.init(DAG, SchedModel, &Rem); + + // Initialize resource counts. - // Initialize the HazardRecognizers. + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or + // are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); const TargetMachine &TM = DAG->MF.getTarget(); - const InstrItineraryData *Itin = TM.getInstrItineraryData(); Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); @@ -905,13 +1001,12 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) { for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned Latency = - DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true); + unsigned MinLatency = I->getMinLatency(); #ifndef NDEBUG - Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency); + Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); #endif - if (SU->TopReadyCycle < PredReadyCycle + Latency) - SU->TopReadyCycle = PredReadyCycle + Latency; + if (SU->TopReadyCycle < PredReadyCycle + MinLatency) + SU->TopReadyCycle = PredReadyCycle + MinLatency; } Top.releaseNode(SU, SU->TopReadyCycle); } @@ -925,17 +1020,27 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) { for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; - unsigned Latency = - DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true); + unsigned MinLatency = I->getMinLatency(); #ifndef NDEBUG - Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency); + Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); #endif - if (SU->BotReadyCycle < SuccReadyCycle + Latency) - SU->BotReadyCycle = SuccReadyCycle + Latency; + if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) + SU->BotReadyCycle = SuccReadyCycle + MinLatency; } Bot.releaseNode(SU, SU->BotReadyCycle); } +void ConvergingScheduler::registerRoots() { + Rem.CriticalPath = DAG->ExitSU.getDepth(); + // Some roots may not feed into ExitSU. Check all of them in case. + for (std::vector<SUnit*>::const_iterator + I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { + if ((*I)->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = (*I)->getDepth(); + } + DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); +} + /// Does this SU have a hazard within the current instruction group. /// /// The scheduler supports two modes of hazard recognition. The first is the @@ -953,14 +1058,27 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { if (HazardRec->isEnabled()) return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; - if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth()) + unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); + if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" + << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); return true; - + } return false; } +/// If expected latency is covered, disable ILP policy. +void ConvergingScheduler::SchedBoundary::checkILPPolicy() { + if (ShouldIncreaseILP + && (IsResourceLimited || ExpectedLatency <= CurrCycle)) { + ShouldIncreaseILP = false; + DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n'); + } +} + void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { + if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; @@ -970,15 +1088,31 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, Pending.push(SU); else Available.push(SU); + + // Record this node as an immediate dependent of the scheduled node. + NextSUs.insert(SU); + + // If CriticalPath has been computed, then check if the unscheduled nodes + // exceed the ILP window. Before registerRoots, CriticalPath==0. + if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU) + > Rem->CriticalPath + ILPWindow)) { + ShouldIncreaseILP = true; + DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " " + << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n'); + } } /// Move the boundary of scheduled code by one cycle. void ConvergingScheduler::SchedBoundary::bumpCycle() { - unsigned Width = DAG->getIssueWidth(); + unsigned Width = SchedModel->getIssueWidth(); IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; + unsigned NextCycle = CurrCycle + 1; assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); - unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); + if (MinReadyCycle > NextCycle) { + IssueCount = 0; + NextCycle = MinReadyCycle; + } if (!HazardRec->isEnabled()) { // Bypass HazardRec virtual calls. @@ -994,11 +1128,39 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() { } } CheckPending = true; + IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); - DEBUG(dbgs() << "*** " << Available.getName() << " cycle " + DEBUG(dbgs() << " *** " << Available.getName() << " cycle " << CurrCycle << '\n'); } +/// Add the given processor resource to this scheduled zone. +void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, + unsigned Cycles) { + unsigned Factor = SchedModel->getResourceFactor(PIdx); + DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name + << " +(" << Cycles << "x" << Factor + << ") / " << SchedModel->getLatencyFactor() << '\n'); + + unsigned Count = Factor * Cycles; + ResourceCounts[PIdx] += Count; + assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); + Rem->RemainingCounts[PIdx] -= Count; + + // Reset MaxRemainingCount for sanity. + Rem->MaxRemainingCount = 0; + + // Check if this resource exceeds the current critical resource by a full + // cycle. If so, it becomes the critical resource. + if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) + >= (int)SchedModel->getLatencyFactor()) { + CritResIdx = PIdx; + DEBUG(dbgs() << " *** Critical resource " + << SchedModel->getProcResource(PIdx)->Name << " x" + << ResourceCounts[PIdx] << '\n'); + } +} + /// Move the boundary of scheduled code by one SUnit. void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { // Update the reservation table. @@ -1010,11 +1172,38 @@ void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { } HazardRec->EmitInstruction(SU); } + // Update resource counts and critical resource. + if (SchedModel->hasInstrSchedModel()) { + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + countResource(PI->ProcResourceIdx, PI->Cycles); + } + } + if (isTop()) { + if (SU->getDepth() > ExpectedLatency) + ExpectedLatency = SU->getDepth(); + } + else { + if (SU->getHeight() > ExpectedLatency) + ExpectedLatency = SU->getHeight(); + } + + IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); + // Check the instruction group dispatch limit. // TODO: Check if this SU must end a dispatch group. - IssueCount += DAG->getNumMicroOps(SU->getInstr()); - if (IssueCount >= DAG->getIssueWidth()) { - DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); + IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); + + // checkHazard prevents scheduling multiple instructions per cycle that exceed + // issue width. However, we commonly reach the maximum. In this case + // opportunistically bump the cycle to avoid uselessly checking everything in + // the readyQ. Furthermore, a single instruction may produce more than one + // cycle's worth of micro-ops. + if (IssueCount >= SchedModel->getIssueWidth()) { + DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n'); bumpCycle(); } } @@ -1045,6 +1234,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() { Pending.remove(Pending.begin()+i); --i; --e; } + DEBUG(if (!Pending.empty()) Pending.dump()); CheckPending = false; } @@ -1059,12 +1249,23 @@ void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { } /// If this queue only has one ready candidate, return it. As a side effect, -/// advance the cycle until at least one node is ready. If multiple instructions -/// are ready, return NULL. +/// defer any nodes that now hit a hazard, and advance the cycle until at least +/// one node is ready. If multiple instructions are ready, return NULL. SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { if (CheckPending) releasePending(); + if (IssueCount > 0) { + // Defer any ready instrs that now have a hazard. + for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { + if (checkHazard(*I)) { + Pending.push(*I); + I = Available.remove(I); + continue; + } + ++I; + } + } for (unsigned i = 0; Available.empty(); ++i) { assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && "permanent hazard"); (void)i; @@ -1076,18 +1277,262 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { return NULL; } -#ifndef NDEBUG -void ConvergingScheduler::traceCandidate(const char *Label, const ReadyQueue &Q, - SUnit *SU, PressureElement P) { - dbgs() << Label << " " << Q.getName() << " "; - if (P.isValid()) - dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease - << " "; - else - dbgs() << " "; - SU->dump(DAG); +/// Record the candidate policy for opposite zones with different critical +/// resources. +/// +/// If the CriticalZone is latency limited, don't force a policy for the +/// candidates here. Instead, When releasing each candidate, releaseNode +/// compares the region's critical path to the candidate's height or depth and +/// the scheduled zone's expected latency then sets ShouldIncreaseILP. +void ConvergingScheduler::balanceZones( + ConvergingScheduler::SchedBoundary &CriticalZone, + ConvergingScheduler::SchedCandidate &CriticalCand, + ConvergingScheduler::SchedBoundary &OppositeZone, + ConvergingScheduler::SchedCandidate &OppositeCand) { + + if (!CriticalZone.IsResourceLimited) + return; + + SchedRemainder *Rem = CriticalZone.Rem; + + // If the critical zone is overconsuming a resource relative to the + // remainder, try to reduce it. + unsigned RemainingCritCount = + Rem->RemainingCounts[CriticalZone.CritResIdx]; + if ((int)(Rem->MaxRemainingCount - RemainingCritCount) + > (int)SchedModel->getLatencyFactor()) { + CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; + DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " + << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name + << '\n'); + } + // If the other zone is underconsuming a resource relative to the full zone, + // try to increase it. + unsigned OppositeCount = + OppositeZone.ResourceCounts[CriticalZone.CritResIdx]; + if ((int)(OppositeZone.ExpectedCount - OppositeCount) + > (int)SchedModel->getLatencyFactor()) { + OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; + DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand " + << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name + << '\n'); + } +} + +/// Determine if the scheduled zones exceed resource limits or critical path and +/// set each candidate's ReduceHeight policy accordingly. +void ConvergingScheduler::checkResourceLimits( + ConvergingScheduler::SchedCandidate &TopCand, + ConvergingScheduler::SchedCandidate &BotCand) { + + Bot.checkILPPolicy(); + Top.checkILPPolicy(); + if (Bot.ShouldIncreaseILP) + BotCand.Policy.ReduceLatency = true; + if (Top.ShouldIncreaseILP) + TopCand.Policy.ReduceLatency = true; + + // Handle resource-limited regions. + if (Top.IsResourceLimited && Bot.IsResourceLimited + && Top.CritResIdx == Bot.CritResIdx) { + // If the scheduled critical resource in both zones is no longer the + // critical remaining resource, attempt to reduce resource height both ways. + if (Top.CritResIdx != Rem.CritResIdx) { + TopCand.Policy.ReduceResIdx = Top.CritResIdx; + BotCand.Policy.ReduceResIdx = Bot.CritResIdx; + DEBUG(dbgs() << "Reduce scheduled " + << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); + } + return; + } + // Handle latency-limited regions. + if (!Top.IsResourceLimited && !Bot.IsResourceLimited) { + // If the total scheduled expected latency exceeds the region's critical + // path then reduce latency both ways. + // + // Just because a zone is not resource limited does not mean it is latency + // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle + // to exceed expected latency. + if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath) + && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { + TopCand.Policy.ReduceLatency = true; + BotCand.Policy.ReduceLatency = true; + DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency + << " + " << Bot.ExpectedLatency << '\n'); + } + return; + } + // The critical resource is different in each zone, so request balancing. + + // Compute the cost of each zone. + Rem.MaxRemainingCount = std::max( + Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(), + Rem.RemainingCounts[Rem.CritResIdx]); + Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); + Top.ExpectedCount = std::max( + Top.getCriticalCount(), + Top.ExpectedCount * SchedModel->getLatencyFactor()); + Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle); + Bot.ExpectedCount = std::max( + Bot.getCriticalCount(), + Bot.ExpectedCount * SchedModel->getLatencyFactor()); + + balanceZones(Top, TopCand, Bot, BotCand); + balanceZones(Bot, BotCand, Top, TopCand); +} + +void ConvergingScheduler::SchedCandidate:: +initResourceDelta(const ScheduleDAGMI *DAG, + const TargetSchedModel *SchedModel) { + if (!Policy.ReduceResIdx && !Policy.DemandResIdx) + return; + + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + if (PI->ProcResourceIdx == Policy.ReduceResIdx) + ResDelta.CritResources += PI->Cycles; + if (PI->ProcResourceIdx == Policy.DemandResIdx) + ResDelta.DemandedResources += PI->Cycles; + } +} + +/// Return true if this heuristic determines order. +static bool tryLess(unsigned TryVal, unsigned CandVal, + ConvergingScheduler::SchedCandidate &TryCand, + ConvergingScheduler::SchedCandidate &Cand, + ConvergingScheduler::CandReason Reason) { + if (TryVal < CandVal) { + TryCand.Reason = Reason; + return true; + } + if (TryVal > CandVal) { + if (Cand.Reason > Reason) + Cand.Reason = Reason; + return true; + } + return false; +} +static bool tryGreater(unsigned TryVal, unsigned CandVal, + ConvergingScheduler::SchedCandidate &TryCand, + ConvergingScheduler::SchedCandidate &Cand, + ConvergingScheduler::CandReason Reason) { + if (TryVal > CandVal) { + TryCand.Reason = Reason; + return true; + } + if (TryVal < CandVal) { + if (Cand.Reason > Reason) + Cand.Reason = Reason; + return true; + } + return false; +} + +/// Apply a set of heursitics to a new candidate. Heuristics are currently +/// hierarchical. This may be more efficient than a graduated cost model because +/// we don't need to evaluate all aspects of the model for each node in the +/// queue. But it's really done to make the heuristics easier to debug and +/// statistically analyze. +/// +/// \param Cand provides the policy and current best candidate. +/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. +/// \param Zone describes the scheduled zone that we are extending. +/// \param RPTracker describes reg pressure within the scheduled zone. +/// \param TempTracker is a scratch pressure tracker to reuse in queries. +void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + RegPressureTracker &TempTracker) { + + // Always initialize TryCand's RPDelta. + TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; + return; + } + // Avoid exceeding the target's limit. + if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, + Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) + return; + if (Cand.Reason == SingleExcess) + Cand.Reason = MultiPressure; + + // Avoid increasing the max critical pressure in the scheduled region. + if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease, + Cand.RPDelta.CriticalMax.UnitIncrease, + TryCand, Cand, SingleCritical)) + return; + if (Cand.Reason == SingleCritical) + Cand.Reason = MultiPressure; + + // Avoid critical resource consumption and balance the schedule. + TryCand.initResourceDelta(DAG, SchedModel); + if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand, ResourceReduce)) + return; + if (tryGreater(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, + TryCand, Cand, ResourceDemand)) + return; + + // Avoid serializing long latency dependence chains. + if (Cand.Policy.ReduceLatency) { + if (Zone.isTop()) { + if (Cand.SU->getDepth() * SchedModel->getLatencyFactor() + > Zone.ExpectedCount) { + if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, TopDepthReduce)) + return; + } + if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, TopPathReduce)) + return; + } + else { + if (Cand.SU->getHeight() * SchedModel->getLatencyFactor() + > Zone.ExpectedCount) { + if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, BotHeightReduce)) + return; + } + if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, BotPathReduce)) + return; + } + } + + // Avoid increasing the max pressure of the entire region. + if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease, + Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax)) + return; + if (Cand.Reason == SingleMax) + Cand.Reason = MultiPressure; + + // Prefer immediate defs/users of the last scheduled instruction. This is a + // nice pressure avoidance strategy that also conserves the processor's + // register renaming resources and keeps the machine code readable. + if (Zone.NextSUs.count(TryCand.SU) && !Zone.NextSUs.count(Cand.SU)) { + TryCand.Reason = NextDefUse; + return; + } + if (!Zone.NextSUs.count(TryCand.SU) && Zone.NextSUs.count(Cand.SU)) { + if (Cand.Reason > NextDefUse) + Cand.Reason = NextDefUse; + return; + } + // Fall through to original instruction order. + if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) + || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { + TryCand.Reason = NodeOrder; + } } -#endif /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is /// more desirable than RHS from scheduling standpoint. @@ -1098,109 +1543,144 @@ static bool compareRPDelta(const RegPressureDelta &LHS, // have UnitIncrease==0, so are neutral. // Avoid increasing the max critical pressure in the scheduled region. - if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) + if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { + DEBUG(dbgs() << "RP excess top - bot: " + << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; - + } // Avoid increasing the max critical pressure in the scheduled region. - if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) + if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { + DEBUG(dbgs() << "RP critical top - bot: " + << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) + << '\n'); return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; - + } // Avoid increasing the max pressure of the entire region. - if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) + if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { + DEBUG(dbgs() << "RP current top - bot: " + << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) + << '\n'); return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; - + } return false; } +#ifndef NDEBUG +const char *ConvergingScheduler::getReasonStr( + ConvergingScheduler::CandReason Reason) { + switch (Reason) { + case NoCand: return "NOCAND "; + case SingleExcess: return "REG-EXCESS"; + case SingleCritical: return "REG-CRIT "; + case SingleMax: return "REG-MAX "; + case MultiPressure: return "REG-MULTI "; + case ResourceReduce: return "RES-REDUCE"; + case ResourceDemand: return "RES-DEMAND"; + case TopDepthReduce: return "TOP-DEPTH "; + case TopPathReduce: return "TOP-PATH "; + case BotHeightReduce:return "BOT-HEIGHT"; + case BotPathReduce: return "BOT-PATH "; + case NextDefUse: return "DEF-USE "; + case NodeOrder: return "ORDER "; + }; + llvm_unreachable("Unknown reason!"); +} + +void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, + const SchedBoundary &Zone) { + const char *Label = getReasonStr(Cand.Reason); + PressureElement P; + unsigned ResIdx = 0; + unsigned Latency = 0; + switch (Cand.Reason) { + default: + break; + case SingleExcess: + P = Cand.RPDelta.Excess; + break; + case SingleCritical: + P = Cand.RPDelta.CriticalMax; + break; + case SingleMax: + P = Cand.RPDelta.CurrentMax; + break; + case ResourceReduce: + ResIdx = Cand.Policy.ReduceResIdx; + break; + case ResourceDemand: + ResIdx = Cand.Policy.DemandResIdx; + break; + case TopDepthReduce: + Latency = Cand.SU->getDepth(); + break; + case TopPathReduce: + Latency = Cand.SU->getHeight(); + break; + case BotHeightReduce: + Latency = Cand.SU->getHeight(); + break; + case BotPathReduce: + Latency = Cand.SU->getDepth(); + break; + } + dbgs() << Label << " " << Zone.Available.getName() << " "; + if (P.isValid()) + dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease + << " "; + else + dbgs() << " "; + if (ResIdx) + dbgs() << SchedModel->getProcResource(ResIdx)->Name << " "; + else + dbgs() << " "; + if (Latency) + dbgs() << Latency << " cycles "; + else + dbgs() << " "; + Cand.SU->dump(DAG); +} +#endif + /// Pick the best candidate from the top queue. /// /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during /// DAG building. To adjust for the current scheduling location we need to /// maintain the number of vreg uses remaining to be top-scheduled. -ConvergingScheduler::CandResult ConvergingScheduler:: -pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, - SchedCandidate &Candidate) { +void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + SchedCandidate &Cand) { + ReadyQueue &Q = Zone.Available; + DEBUG(Q.dump()); // getMaxPressureDelta temporarily modifies the tracker. RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); - // BestSU remains NULL if no top candidates beat the best existing candidate. - CandResult FoundCandidate = NoCand; for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { - RegPressureDelta RPDelta; - TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, - DAG->getRegionCriticalPSets(), - DAG->getRegPressure().MaxSetPressure); - - // Initialize the candidate if needed. - if (!Candidate.SU) { - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - FoundCandidate = NodeOrder; - continue; - } - // Avoid exceeding the target's limit. - if (RPDelta.Excess.UnitIncrease < Candidate.RPDelta.Excess.UnitIncrease) { - DEBUG(traceCandidate("ECAND", Q, *I, RPDelta.Excess)); - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - FoundCandidate = SingleExcess; - continue; - } - if (RPDelta.Excess.UnitIncrease > Candidate.RPDelta.Excess.UnitIncrease) - continue; - if (FoundCandidate == SingleExcess) - FoundCandidate = MultiPressure; - - // Avoid increasing the max critical pressure in the scheduled region. - if (RPDelta.CriticalMax.UnitIncrease - < Candidate.RPDelta.CriticalMax.UnitIncrease) { - DEBUG(traceCandidate("PCAND", Q, *I, RPDelta.CriticalMax)); - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - FoundCandidate = SingleCritical; - continue; - } - if (RPDelta.CriticalMax.UnitIncrease - > Candidate.RPDelta.CriticalMax.UnitIncrease) - continue; - if (FoundCandidate == SingleCritical) - FoundCandidate = MultiPressure; - - // Avoid increasing the max pressure of the entire region. - if (RPDelta.CurrentMax.UnitIncrease - < Candidate.RPDelta.CurrentMax.UnitIncrease) { - DEBUG(traceCandidate("MCAND", Q, *I, RPDelta.CurrentMax)); - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - FoundCandidate = SingleMax; - continue; - } - if (RPDelta.CurrentMax.UnitIncrease - > Candidate.RPDelta.CurrentMax.UnitIncrease) - continue; - if (FoundCandidate == SingleMax) - FoundCandidate = MultiPressure; - - // Fall through to original instruction order. - // Only consider node order if Candidate was chosen from this Q. - if (FoundCandidate == NoCand) - continue; - if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) - || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { - DEBUG(traceCandidate("NCAND", Q, *I)); - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - FoundCandidate = NodeOrder; + SchedCandidate TryCand(Cand.Policy); + TryCand.SU = *I; + tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); + if (TryCand.Reason != NoCand) { + // Initialize resource delta if needed in case future heuristics query it. + if (TryCand.ResDelta == SchedResourceDelta()) + TryCand.initResourceDelta(DAG, SchedModel); + Cand.setBest(TryCand); + DEBUG(traceCandidate(Cand, Zone)); } + TryCand.SU = *I; } - return FoundCandidate; +} + +static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, + bool IsTop) { + DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot") + << " SU(" << Cand.SU->NodeNum << ") " + << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); } /// Pick the best candidate node from either the top or bottom queue. -SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) { +SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { // Schedule as far as possible in the direction of no choice. This is most // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { @@ -1211,11 +1691,14 @@ SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) { IsTopNode = true; return SU; } - SchedCandidate BotCand; + CandPolicy NoPolicy; + SchedCandidate BotCand(NoPolicy); + SchedCandidate TopCand(NoPolicy); + checkResourceLimits(TopCand, BotCand); + // Prefer bottom scheduling when heuristics are silent. - CandResult BotResult = pickNodeFromQueue(Bot.Available, - DAG->getBotRPTracker(), BotCand); - assert(BotResult != NoCand && "failed to find the first candidate"); + pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); + assert(BotCand.Reason != NoCand && "failed to find the first candidate"); // If either Q has a single candidate that provides the least increase in // Excess pressure, we can immediately schedule from that Q. @@ -1224,37 +1707,41 @@ SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) { // affects picking from either Q. If scheduling in one direction must // increase pressure for one of the excess PSets, then schedule in that // direction first to provide more freedom in the other direction. - if (BotResult == SingleExcess || BotResult == SingleCritical) { + if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) { IsTopNode = false; + tracePick(BotCand, IsTopNode); return BotCand.SU; } // Check if the top Q has a better candidate. - SchedCandidate TopCand; - CandResult TopResult = pickNodeFromQueue(Top.Available, - DAG->getTopRPTracker(), TopCand); - assert(TopResult != NoCand && "failed to find the first candidate"); + pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); + assert(TopCand.Reason != NoCand && "failed to find the first candidate"); - if (TopResult == SingleExcess || TopResult == SingleCritical) { - IsTopNode = true; - return TopCand.SU; - } // If either Q has a single candidate that minimizes pressure above the // original region's pressure pick it. - if (BotResult == SingleMax) { + if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) { + if (TopCand.Reason < BotCand.Reason) { + IsTopNode = true; + tracePick(TopCand, IsTopNode); + return TopCand.SU; + } IsTopNode = false; + tracePick(BotCand, IsTopNode); return BotCand.SU; } - if (TopResult == SingleMax) { + // Check for a salient pressure difference and pick the best from either side. + if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { IsTopNode = true; + tracePick(TopCand, IsTopNode); return TopCand.SU; } - // Check for a salient pressure difference and pick the best from either side. - if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { + // Otherwise prefer the bottom candidate, in node order if all else failed. + if (TopCand.Reason < BotCand.Reason) { IsTopNode = true; + tracePick(TopCand, IsTopNode); return TopCand.SU; } - // Otherwise prefer the bottom candidate in node order. IsTopNode = false; + tracePick(BotCand, IsTopNode); return BotCand.SU; } @@ -1266,33 +1753,34 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { return NULL; } SUnit *SU; - if (ForceTopDown) { - SU = Top.pickOnlyChoice(); - if (!SU) { - SchedCandidate TopCand; - CandResult TopResult = - pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand); - assert(TopResult != NoCand && "failed to find the first candidate"); - (void)TopResult; - SU = TopCand.SU; + do { + if (ForceTopDown) { + SU = Top.pickOnlyChoice(); + if (!SU) { + CandPolicy NoPolicy; + SchedCandidate TopCand(NoPolicy); + pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); + assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + SU = TopCand.SU; + } + IsTopNode = true; } - IsTopNode = true; - } - else if (ForceBottomUp) { - SU = Bot.pickOnlyChoice(); - if (!SU) { - SchedCandidate BotCand; - CandResult BotResult = - pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand); - assert(BotResult != NoCand && "failed to find the first candidate"); - (void)BotResult; - SU = BotCand.SU; + else if (ForceBottomUp) { + SU = Bot.pickOnlyChoice(); + if (!SU) { + CandPolicy NoPolicy; + SchedCandidate BotCand(NoPolicy); + pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); + assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + SU = BotCand.SU; + } + IsTopNode = false; } - IsTopNode = false; - } - else { - SU = pickNodeBidrectional(IsTopNode); - } + else { + SU = pickNodeBidirectional(IsTopNode); + } + } while (SU->isScheduled); + if (SU->isTopReady()) Top.removeReady(SU); if (SU->isBottomReady()) @@ -1331,6 +1819,86 @@ ConvergingSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched); //===----------------------------------------------------------------------===// +// ILP Scheduler. Currently for experimental analysis of heuristics. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Order nodes by the ILP metric. +struct ILPOrder { + ScheduleDAGILP *ILP; + bool MaximizeILP; + + ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {} + + /// \brief Apply a less-than relation on node priority. + bool operator()(const SUnit *A, const SUnit *B) const { + // Return true if A comes after B in the Q. + if (MaximizeILP) + return ILP->getILP(A) < ILP->getILP(B); + else + return ILP->getILP(A) > ILP->getILP(B); + } +}; + +/// \brief Schedule based on the ILP metric. +class ILPScheduler : public MachineSchedStrategy { + ScheduleDAGILP ILP; + ILPOrder Cmp; + + std::vector<SUnit*> ReadyQ; +public: + ILPScheduler(bool MaximizeILP) + : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {} + + virtual void initialize(ScheduleDAGMI *DAG) { + ReadyQ.clear(); + ILP.resize(DAG->SUnits.size()); + } + + virtual void registerRoots() { + for (std::vector<SUnit*>::const_iterator + I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) { + ILP.computeILP(*I); + } + } + + /// Implement MachineSchedStrategy interface. + /// ----------------------------------------- + + virtual SUnit *pickNode(bool &IsTopNode) { + if (ReadyQ.empty()) return NULL; + pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + SUnit *SU = ReadyQ.back(); + ReadyQ.pop_back(); + IsTopNode = false; + DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr() + << " ILP: " << ILP.getILP(SU) << '\n'); + return SU; + } + + virtual void schedNode(SUnit *, bool) {} + + virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } + + virtual void releaseBottomNode(SUnit *SU) { + ReadyQ.push_back(SU); + std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + } +}; +} // namespace + +static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { + return new ScheduleDAGMI(C, new ILPScheduler(true)); +} +static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { + return new ScheduleDAGMI(C, new ILPScheduler(false)); +} +static MachineSchedRegistry ILPMaxRegistry( + "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); +static MachineSchedRegistry ILPMinRegistry( + "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); + +//===----------------------------------------------------------------------===// // Machine Instruction Shuffler for Correctness Testing //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp index bc383cb..b117f8c 100644 --- a/contrib/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -49,7 +49,6 @@ namespace { MachineDominatorTree *DT; // Machine dominator tree MachineLoopInfo *LI; AliasAnalysis *AA; - BitVector AllocatableSet; // Which physregs are allocatable? // Remember which edges have been considered for breaking. SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8> @@ -229,7 +228,6 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { DT = &getAnalysis<MachineDominatorTree>(); LI = &getAnalysis<MachineLoopInfo>(); AA = &getAnalysis<AliasAnalysis>(); - AllocatableSet = TRI->getAllocatableSet(MF); bool EverMadeChange = false; diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 1a3aa60..9686b04 100644 --- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -14,9 +14,10 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/PostOrderIterator.h" @@ -50,9 +51,11 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { MF = &Func; TII = MF->getTarget().getInstrInfo(); TRI = MF->getTarget().getRegisterInfo(); - ItinData = MF->getTarget().getInstrItineraryData(); MRI = &MF->getRegInfo(); Loops = &getAnalysis<MachineLoopInfo>(); + const TargetSubtargetInfo &ST = + MF->getTarget().getSubtarget<TargetSubtargetInfo>(); + SchedModel.init(*ST.getSchedModel(), &ST, TII); BlockInfo.resize(MF->getNumBlockIDs()); return false; } @@ -674,7 +677,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); // Ignore dependencies outside the current trace. const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()]; - if (!DefTBI.hasValidDepth() || DefTBI.Head != TBI.Head) + if (!DefTBI.isEarlierInSameTrace(TBI)) continue; unsigned Len = LIR.Height + Cycles[DefMI].Depth; MaxLen = std::max(MaxLen, Len); @@ -737,16 +740,15 @@ computeInstrDepths(const MachineBasicBlock *MBB) { const TraceBlockInfo&DepTBI = BlockInfo[Dep.DefMI->getParent()->getNumber()]; // Ignore dependencies from outside the current trace. - if (!DepTBI.hasValidDepth() || DepTBI.Head != TBI.Head) + if (!DepTBI.isEarlierInSameTrace(TBI)) continue; assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency"); unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) - DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData, - Dep.DefMI, Dep.DefOp, - UseMI, Dep.UseOp, - /* FindMin = */ false); + DepCycle += MTM.SchedModel + .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp, + /* FindMin = */ false); Cycle = std::max(Cycle, DepCycle); } // Remember the instruction depth. @@ -769,7 +771,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) { // Height is the issue height computed from virtual register dependencies alone. static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, SparseSet<LiveRegUnit> &RegUnits, - const InstrItineraryData *ItinData, + const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { SmallVector<unsigned, 8> ReadOps; @@ -792,14 +794,10 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, unsigned DepHeight = I->Cycle; if (!MI->isTransient()) { // We may not know the UseMI of this dependency, if it came from the - // live-in list. - if (I->MI) - DepHeight += TII->computeOperandLatency(ItinData, - MI, MO.getOperandNo(), - I->MI, I->Op); - else - // No UseMI. Just use the MI latency instead. - DepHeight += TII->getInstrLatency(ItinData, MI); + // live-in list. SchedModel can handle a NULL UseMI. + DepHeight += SchedModel + .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op, + /* FindMin = */ false); } Height = std::max(Height, DepHeight); // This regunit is dead above MI. @@ -832,12 +830,12 @@ typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap; static bool pushDepHeight(const DataDep &Dep, const MachineInstr *UseMI, unsigned UseHeight, MIHeightMap &Heights, - const InstrItineraryData *ItinData, + const TargetSchedModel &SchedModel, const TargetInstrInfo *TII) { // Adjust height by Dep.DefMI latency. if (!Dep.DefMI->isTransient()) - UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp, - UseMI, Dep.UseOp); + UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, + UseMI, Dep.UseOp, false); // Update Heights[DefMI] to be the maximum height seen. MIHeightMap::iterator I; @@ -852,14 +850,14 @@ static bool pushDepHeight(const DataDep &Dep, return false; } -/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists -/// of all the blocks in Trace. Stop when reaching the block that contains -/// DefMI. +/// Assuming that the virtual register defined by DefMI:DefOp was used by +/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop +/// when reaching the block that contains DefMI. void MachineTraceMetrics::Ensemble:: -addLiveIns(const MachineInstr *DefMI, +addLiveIns(const MachineInstr *DefMI, unsigned DefOp, ArrayRef<const MachineBasicBlock*> Trace) { assert(!Trace.empty() && "Trace should contain at least one block"); - unsigned Reg = DefMI->getOperand(0).getReg(); + unsigned Reg = DefMI->getOperand(DefOp).getReg(); assert(TargetRegisterInfo::isVirtualRegister(Reg)); const MachineBasicBlock *DefMBB = DefMI->getParent(); @@ -951,8 +949,8 @@ computeInstrHeights(const MachineBasicBlock *MBB) { unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0; DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI); if (pushDepHeight(Deps.front(), PHI, Height, - Heights, MTM.ItinData, MTM.TII)) - addLiveIns(Deps.front().DefMI, Stack); + Heights, MTM.SchedModel, MTM.TII)) + addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack); } } } @@ -980,12 +978,12 @@ computeInstrHeights(const MachineBasicBlock *MBB) { // There may also be regunit dependencies to include in the height. if (HasPhysRegs) Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, - MTM.ItinData, MTM.TII, MTM.TRI); + MTM.SchedModel, MTM.TII, MTM.TRI); // Update the required height of any virtual registers read by MI. for (unsigned i = 0, e = Deps.size(); i != e; ++i) - if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII)) - addLiveIns(Deps[i].DefMI, Stack); + if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII)) + addLiveIns(Deps[i].DefMI, Deps[i].DefOp, Stack); InstrCycles &MICycles = Cycles[MI]; MICycles.Height = Cycle; @@ -1054,10 +1052,8 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth; // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) - DepCycle += TE.MTM.TII->computeOperandLatency(TE.MTM.ItinData, - Dep.DefMI, Dep.DefOp, - PHI, Dep.UseOp, - /* FindMin = */ false); + DepCycle += TE.MTM.SchedModel + .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp, false); return DepCycle; } @@ -1068,9 +1064,8 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { unsigned Instrs = TBI.InstrDepth; if (Bottom) Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; - if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel) - if (Model->IssueWidth != 0) - return Instrs / Model->IssueWidth; + if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) + Instrs /= IW; // Assume issue width 1 without a schedule model. return Instrs; } @@ -1080,9 +1075,8 @@ getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const { unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i) Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount; - if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel) - if (Model->IssueWidth != 0) - return Instrs / Model->IssueWidth; + if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) + Instrs /= IW; // Assume issue width 1 without a schedule model. return Instrs; } diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.h b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.h index c5b86f3..460730b 100644 --- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.h +++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.h @@ -50,6 +50,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/TargetSchedule.h" namespace llvm { @@ -67,9 +68,9 @@ class MachineTraceMetrics : public MachineFunctionPass { const MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - const InstrItineraryData *ItinData; const MachineRegisterInfo *MRI; const MachineLoopInfo *Loops; + TargetSchedModel SchedModel; public: class Ensemble; @@ -164,6 +165,14 @@ public: /// Invalidate height resources when a block below this one has changed. void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; } + /// Determine if this block belongs to the same trace as TBI and comes + /// before it in the trace. + /// Also returns true when TBI == this. + bool isEarlierInSameTrace(const TraceBlockInfo &TBI) const { + return hasValidDepth() && TBI.hasValidDepth() && + Head == TBI.Head && InstrDepth <= TBI.InstrDepth; + } + // Data-dependency-related information. Per-instruction depth and height // are computed from data dependencies in the current trace, using // itinerary data. @@ -270,7 +279,7 @@ public: unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&); void computeInstrDepths(const MachineBasicBlock*); void computeInstrHeights(const MachineBasicBlock*); - void addLiveIns(const MachineInstr *DefMI, + void addLiveIns(const MachineInstr *DefMI, unsigned DefOp, ArrayRef<const MachineBasicBlock*> Trace); protected: diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index f745b41..69a3ae8 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -23,8 +23,9 @@ // the verifier errors. //===----------------------------------------------------------------------===// +#include "llvm/BasicBlock.h" +#include "llvm/InlineAsm.h" #include "llvm/Instructions.h" -#include "llvm/Function.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/LiveStackAnalysis.h" @@ -73,11 +74,12 @@ namespace { typedef SmallVector<const uint32_t*, 4> RegMaskVector; typedef DenseSet<unsigned> RegSet; typedef DenseMap<unsigned, const MachineInstr*> RegMap; + typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet; const MachineInstr *FirstTerminator; + BlockSet FunctionBlocks; BitVector regsReserved; - BitVector regsAllocatable; RegSet regsLive; RegVector regsDefined, regsDead, regsKilled; RegMaskVector regMasks; @@ -117,6 +119,9 @@ namespace { // block. This set is disjoint from regsLiveOut. RegSet vregsRequired; + // Set versions of block's predecessor and successor lists. + BlockSet Preds, Succs; + BBInfo() : reachable(false) {} // Add register to vregsPassed if it belongs there. Return true if @@ -180,7 +185,7 @@ namespace { } bool isAllocatable(unsigned Reg) { - return Reg < regsAllocatable.size() && regsAllocatable.test(Reg); + return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg); } // Analysis information if available @@ -208,6 +213,8 @@ namespace { void report(const char *msg, const MachineBasicBlock *MBB, const LiveInterval &LI); + void verifyInlineAsm(const MachineInstr *MI); + void checkLiveness(const MachineOperand *MO, unsigned MONum); void markReachable(const MachineBasicBlock *MBB); void calcRegsPassed(); @@ -352,7 +359,7 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) { MF->print(*OS, Indexes); } *OS << "*** Bad machine code: " << msg << " ***\n" - << "- function: " << MF->getFunction()->getName() << "\n"; + << "- function: " << MF->getName() << "\n"; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { @@ -360,7 +367,7 @@ void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { report(msg, MBB->getParent()); *OS << "- basic block: BB#" << MBB->getNumber() << ' ' << MBB->getName() - << " (" << (void*)MBB << ')'; + << " (" << (const void*)MBB << ')'; if (Indexes) *OS << " [" << Indexes->getMBBStartIdx(MBB) << ';' << Indexes->getMBBEndIdx(MBB) << ')'; @@ -419,7 +426,7 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { void MachineVerifier::visitMachineFunctionBefore() { lastIndex = SlotIndex(); - regsReserved = TRI->getReservedRegs(*MF); + regsReserved = MRI->getReservedRegs(); // A sub-register of a reserved register is also reserved for (int Reg = regsReserved.find_first(); Reg>=0; @@ -431,9 +438,23 @@ void MachineVerifier::visitMachineFunctionBefore() { } } - regsAllocatable = TRI->getAllocatableSet(*MF); - markReachable(&MF->front()); + + // Build a set of the basic blocks in the function. + FunctionBlocks.clear(); + for (MachineFunction::const_iterator + I = MF->begin(), E = MF->end(); I != E; ++I) { + FunctionBlocks.insert(I); + BBInfo &MInfo = MBBInfoMap[I]; + + MInfo.Preds.insert(I->pred_begin(), I->pred_end()); + if (MInfo.Preds.size() != I->pred_size()) + report("MBB has duplicate entries in its predecessor list.", I); + + MInfo.Succs.insert(I->succ_begin(), I->succ_end()); + if (MInfo.Succs.size() != I->succ_size()) + report("MBB has duplicate entries in its successor list.", I); + } } // Does iterator point to a and b as the first two elements? @@ -470,6 +491,25 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { E = MBB->succ_end(); I != E; ++I) { if ((*I)->isLandingPad()) LandingPadSuccs.insert(*I); + if (!FunctionBlocks.count(*I)) + report("MBB has successor that isn't part of the function.", MBB); + if (!MBBInfoMap[*I].Preds.count(MBB)) { + report("Inconsistent CFG", MBB); + *OS << "MBB is not in the predecessor list of the successor BB#" + << (*I)->getNumber() << ".\n"; + } + } + + // Check the predecessor list. + for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), + E = MBB->pred_end(); I != E; ++I) { + if (!FunctionBlocks.count(*I)) + report("MBB has predecessor that isn't part of the function.", MBB); + if (!MBBInfoMap[*I].Succs.count(MBB)) { + report("Inconsistent CFG", MBB); + *OS << "MBB is not in the successor list of the predecessor BB#" + << (*I)->getNumber() << ".\n"; + } } const MCAsmInfo *AsmInfo = TM->getMCAsmInfo(); @@ -540,7 +580,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { ++MBBI; if (MBBI == MF->end()) { report("MBB conditionally falls through out of function!", MBB); - } if (MBB->succ_size() != 2) { + } if (MBB->succ_size() == 1) { + // A conditional branch with only one successor is weird, but allowed. + if (&*MBBI != TBB) + report("MBB exits via conditional branch/fall-through but only has " + "one CFG successor!", MBB); + else if (TBB != *MBB->succ_begin()) + report("MBB exits via conditional branch/fall-through but the CFG " + "successor don't match the actual successor!", MBB); + } else if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/fall-through but doesn't have " "exactly two CFG successors!", MBB); } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) { @@ -560,7 +608,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } else if (TBB && FBB) { // Block conditionally branches somewhere, otherwise branches // somewhere else. - if (MBB->succ_size() != 2) { + if (MBB->succ_size() == 1) { + // A conditional branch with only one successor is weird, but allowed. + if (FBB != TBB) + report("MBB exits via conditional branch/branch through but only has " + "one CFG successor!", MBB); + else if (TBB != *MBB->succ_begin()) + report("MBB exits via conditional branch/branch through but the CFG " + "successor don't match the actual successor!", MBB); + } else if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/branch but doesn't have " "exactly two CFG successors!", MBB); } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) { @@ -639,6 +695,50 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { } } +// The operands on an INLINEASM instruction must follow a template. +// Verify that the flag operands make sense. +void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) { + // The first two operands on INLINEASM are the asm string and global flags. + if (MI->getNumOperands() < 2) { + report("Too few operands on inline asm", MI); + return; + } + if (!MI->getOperand(0).isSymbol()) + report("Asm string must be an external symbol", MI); + if (!MI->getOperand(1).isImm()) + report("Asm flags must be an immediate", MI); + // Allowed flags are Extra_HasSideEffects = 1, Extra_IsAlignStack = 2, + // Extra_AsmDialect = 4, Extra_MayLoad = 8, and Extra_MayStore = 16. + if (!isUInt<5>(MI->getOperand(1).getImm())) + report("Unknown asm flags", &MI->getOperand(1), 1); + + assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed"); + + unsigned OpNo = InlineAsm::MIOp_FirstOperand; + unsigned NumOps; + for (unsigned e = MI->getNumOperands(); OpNo < e; OpNo += NumOps) { + const MachineOperand &MO = MI->getOperand(OpNo); + // There may be implicit ops after the fixed operands. + if (!MO.isImm()) + break; + NumOps = 1 + InlineAsm::getNumOperandRegisters(MO.getImm()); + } + + if (OpNo > MI->getNumOperands()) + report("Missing operands in last group", MI); + + // An optional MDNode follows the groups. + if (OpNo < MI->getNumOperands() && MI->getOperand(OpNo).isMetadata()) + ++OpNo; + + // All trailing operands must be implicit registers. + for (unsigned e = MI->getNumOperands(); OpNo < e; ++OpNo) { + const MachineOperand &MO = MI->getOperand(OpNo); + if (!MO.isReg() || !MO.isImplicit()) + report("Expected implicit register after groups", &MO, OpNo); + } +} + void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { const MCInstrDesc &MCID = MI->getDesc(); if (MI->getNumOperands() < MCID.getNumOperands()) { @@ -647,6 +747,10 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { << MI->getNumExplicitOperands() << " given.\n"; } + // Check the tied operands. + if (MI->isInlineAsm()) + verifyInlineAsm(MI); + // Check the MachineMemOperands for basic consistency. for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I) { @@ -702,6 +806,17 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MO->isImplicit()) report("Explicit operand marked as implicit", MO, MONum); } + + int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO); + if (TiedTo != -1) { + if (!MO->isReg()) + report("Tied use must be a register", MO, MONum); + else if (!MO->isTied()) + report("Operand should be tied", MO, MONum); + else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum)) + report("Tied def doesn't match MCInstrDesc", MO, MONum); + } else if (MO->isReg() && MO->isTied()) + report("Explicit operand should not be tied", MO, MONum); } else { // ARM adds %reg0 operands to indicate predicates. We'll allow that. if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg()) @@ -716,6 +831,28 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MRI->tracksLiveness() && !MI->isDebugValue()) checkLiveness(MO, MONum); + // Verify the consistency of tied operands. + if (MO->isTied()) { + unsigned OtherIdx = MI->findTiedOperandIdx(MONum); + const MachineOperand &OtherMO = MI->getOperand(OtherIdx); + if (!OtherMO.isReg()) + report("Must be tied to a register", MO, MONum); + if (!OtherMO.isTied()) + report("Missing tie flags on tied operand", MO, MONum); + if (MI->findTiedOperandIdx(OtherIdx) != MONum) + report("Inconsistent tie links", MO, MONum); + if (MONum < MCID.getNumDefs()) { + if (OtherIdx < MCID.getNumOperands()) { + if (-1 == MCID.getOperandConstraint(OtherIdx, MCOI::TIED_TO)) + report("Explicit def tied to explicit use without tie constraint", + MO, MONum); + } else { + if (!OtherMO.isImplicit()) + report("Explicit def should be tied to implicit use", MO, MONum); + } + } + } + // Verify two-address constraints after leaving SSA form. unsigned DefIdx; if (!MRI->isSSA() && MO->isUse() && diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp index cfa3eec..4ea21d4 100644 --- a/contrib/llvm/lib/CodeGen/Passes.cpp +++ b/contrib/llvm/lib/CodeGen/Passes.cpp @@ -49,8 +49,8 @@ static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden, cl::desc("Disable Stack Slot Coloring")); static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden, cl::desc("Disable Machine Dead Code Elimination")); -static cl::opt<bool> EnableEarlyIfConversion("enable-early-ifcvt", cl::Hidden, - cl::desc("Enable Early If-conversion")); +static cl::opt<bool> DisableEarlyIfConversion("disable-early-ifcvt", cl::Hidden, + cl::desc("Disable Early If-conversion")); static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden, @@ -161,7 +161,7 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) { return applyDisable(TargetID, DisableMachineDCE); if (StandardID == &EarlyIfConverterID) - return applyDisable(TargetID, !EnableEarlyIfConversion); + return applyDisable(TargetID, DisableEarlyIfConversion); if (StandardID == &MachineLICMID) return applyDisable(TargetID, DisableMachineLICM); @@ -447,8 +447,8 @@ void TargetPassConfig::addMachinePasses() { const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue()); const PassInfo *IPI = PR->getPassInfo(StringRef("print-machineinstrs")); assert (TPI && IPI && "Pass ID not registered!"); - const char *TID = (char *)(TPI->getTypeInfo()); - const char *IID = (char *)(IPI->getTypeInfo()); + const char *TID = (const char *)(TPI->getTypeInfo()); + const char *IID = (const char *)(IPI->getTypeInfo()); insertPass(TID, IID); } @@ -456,7 +456,8 @@ void TargetPassConfig::addMachinePasses() { printAndVerify("After Instruction Selection"); // Expand pseudo-instructions emitted by ISel. - addPass(&ExpandISelPseudosID); + if (addPass(&ExpandISelPseudosID)) + printAndVerify("After ExpandISelPseudos"); // Add passes that optimize machine instructions in SSA form. if (getOptLevel() != CodeGenOpt::None) { @@ -528,6 +529,10 @@ void TargetPassConfig::addMachineSSAOptimization() { // instructions dead. addPass(&OptimizePHIsID); + // This pass merges large allocas. StackSlotColoring is a different pass + // which merges spill slots. + addPass(&StackColoringID); + // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. addPass(&LocalStackSlotAllocationID); diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 9099862..a795ac8 100644 --- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -527,6 +527,11 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { SeenMoveImm = true; } else { Changed |= optimizeExtInstr(MI, MBB, LocalMIs); + // optimizeExtInstr might have created new instructions after MI + // and before the already incremented MII. Adjust MII so that the + // next iteration sees the new instructions. + MII = MI; + ++MII; if (SeenMoveImm) Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp index 7449ff5..d57bc73 100644 --- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -240,6 +240,7 @@ void SchedulePostRATDList::exitRegion() { ScheduleDAGInstrs::exitRegion(); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// dumpSchedule - dump the scheduled Sequence. void SchedulePostRATDList::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { @@ -249,6 +250,7 @@ void SchedulePostRATDList::dumpSchedule() const { dbgs() << "**** NOOP ****\n"; } } +#endif bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { TII = Fn.getTarget().getInstrInfo(); @@ -298,7 +300,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { static int bbcnt = 0; if (bbcnt++ % DebugDiv != DebugMod) continue; - dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getName() + dbgs() << "*** DEBUG scheduling " << Fn.getName() << ":BB#" << MBB->getNumber() << " ***\n"; } #endif @@ -488,7 +490,6 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); BitVector killedRegs(TRI->getNumRegs()); - BitVector ReservedRegs = TRI->getReservedRegs(MF); StartBlockForKills(MBB); @@ -529,7 +530,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; unsigned Reg = MO.getReg(); - if ((Reg == 0) || ReservedRegs.test(Reg)) continue; + if ((Reg == 0) || MRI.isReserved(Reg)) continue; bool kill = false; if (!killedRegs.test(Reg)) { @@ -564,7 +565,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; unsigned Reg = MO.getReg(); - if ((Reg == 0) || ReservedRegs.test(Reg)) continue; + if ((Reg == 0) || MRI.isReserved(Reg)) continue; LiveRegs.set(Reg); diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index 34d075c..e4e18c3 100644 --- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -137,8 +137,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" - << "********** Function: " - << ((Value*)MF.getFunction())->getName() << '\n'); + << "********** Function: " << MF.getName() << '\n'); bool Changed = false; diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index c791ffb..77554d6 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -96,7 +96,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { placeCSRSpillsAndRestores(Fn); // Add the code to save and restore the callee saved registers - if (!F->hasFnAttr(Attribute::Naked)) + if (!F->getFnAttributes().hasAttribute(Attributes::Naked)) insertCSRSpillsAndRestores(Fn); // Allow the target machine to make final modifications to the function @@ -111,7 +111,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. - if (!F->hasFnAttr(Attribute::Naked)) + if (!F->getFnAttributes().hasAttribute(Attributes::Naked)) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references @@ -221,13 +221,13 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { return; // In Naked functions we aren't going to save any registers. - if (Fn.getFunction()->hasFnAttr(Attribute::Naked)) + if (Fn.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked)) return; std::vector<CalleeSavedInfo> CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; - if (Fn.getRegInfo().isPhysRegOrOverlapUsed(Reg)) { + if (Fn.getRegInfo().isPhysRegUsed(Reg)) { // If the reg is modified, save it! CSI.push_back(CalleeSavedInfo(Reg)); } diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp index 3a03807..8a49609 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -20,7 +20,6 @@ #include "VirtRegMap.h" #include "LiveRegMatrix.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Function.h" #include "llvm/PassAnalysisSupport.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -273,7 +272,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, bool RABasic::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n" << "********** Function: " - << ((Value*)mf.getFunction())->getName() << '\n'); + << mf.getName() << '\n'); MF = &mf; RegAllocBase::init(getAnalysis<VirtRegMap>(), diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp index 6b3a48e..8892216 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -113,9 +113,11 @@ namespace { // PhysRegState - One of the RegState enums, or a virtreg. std::vector<unsigned> PhysRegState; - // UsedInInstr - BitVector of physregs that are used in the current - // instruction, and so cannot be allocated. - BitVector UsedInInstr; + typedef SparseSet<unsigned> UsedInInstrSet; + + // UsedInInstr - Set of physregs that are used in the current instruction, + // and so cannot be allocated. + UsedInInstrSet UsedInInstr; // SkippedInstrs - Descriptors of instructions whose clobber list was // ignored because all registers were spilled. It is still necessary to @@ -173,7 +175,7 @@ namespace { unsigned VirtReg, unsigned Hint); LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); - void spillAll(MachineInstr *MI); + void spillAll(MachineBasicBlock::iterator MI); bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg); void addRetOperands(MachineBasicBlock *MBB); }; @@ -312,7 +314,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, } /// spillAll - Spill all dirty virtregs without killing them. -void RAFast::spillAll(MachineInstr *MI) { +void RAFast::spillAll(MachineBasicBlock::iterator MI) { if (LiveVirtRegs.empty()) return; isBulkSpilling = true; // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order @@ -340,7 +342,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { PhysRegState[PhysReg] = regFree; // Fall through case regFree: - UsedInInstr.set(PhysReg); + UsedInInstr.insert(PhysReg); MO.setIsKill(); return; default: @@ -360,13 +362,13 @@ void RAFast::usePhysReg(MachineOperand &MO) { "Instruction is not using a subregister of a reserved register"); // Leave the superregister in the working set. PhysRegState[Alias] = regFree; - UsedInInstr.set(Alias); + UsedInInstr.insert(Alias); MO.getParent()->addRegisterKilled(Alias, TRI, true); return; case regFree: if (TRI->isSuperRegister(PhysReg, Alias)) { // Leave the superregister in the working set. - UsedInInstr.set(Alias); + UsedInInstr.insert(Alias); MO.getParent()->addRegisterKilled(Alias, TRI, true); return; } @@ -380,7 +382,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { // All aliases are disabled, bring register into working set. PhysRegState[PhysReg] = regFree; - UsedInInstr.set(PhysReg); + UsedInInstr.insert(PhysReg); MO.setIsKill(); } @@ -389,7 +391,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { /// reserved instead of allocated. void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState) { - UsedInInstr.set(PhysReg); + UsedInInstr.insert(PhysReg); switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; @@ -429,7 +431,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, // can be allocated directly. // Returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RAFast::calcSpillCost(unsigned PhysReg) const { - if (UsedInInstr.test(PhysReg)) { + if (UsedInInstr.count(PhysReg)) { DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n"); return spillImpossible; } @@ -454,7 +456,7 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { unsigned Cost = 0; for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { unsigned Alias = *AI; - if (UsedInInstr.test(Alias)) + if (UsedInInstr.count(Alias)) return spillImpossible; switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: @@ -509,7 +511,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, // Ignore invalid hints. if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || - !RC->contains(Hint) || !RegClassInfo.isAllocatable(Hint))) + !RC->contains(Hint) || !MRI->isAllocatable(Hint))) Hint = 0; // Take hint when possible. @@ -530,7 +532,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, // First try to find a completely free register. for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) { unsigned PhysReg = *I; - if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) { + if (PhysRegState[PhysReg] == regFree && !UsedInInstr.count(PhysReg)) { assignVirtToPhysReg(*LRI, PhysReg); return LRI; } @@ -596,7 +598,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, LRI->LastUse = MI; LRI->LastOpNum = OpNum; LRI->Dirty = true; - UsedInInstr.set(LRI->PhysReg); + UsedInInstr.insert(LRI->PhysReg); return LRI; } @@ -646,7 +648,7 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, assert(LRI->PhysReg && "Register not assigned"); LRI->LastUse = MI; LRI->LastOpNum = OpNum; - UsedInInstr.set(LRI->PhysReg); + UsedInInstr.insert(LRI->PhysReg); return LRI; } @@ -708,7 +710,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - UsedInInstr.set(*AI); + UsedInInstr.insert(*AI); if (ThroughRegs.count(PhysRegState[*AI])) definePhysReg(MI, *AI, regFree); } @@ -756,7 +758,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, } // Restore UsedInInstr to a state usable for allocating normal virtual uses. - UsedInInstr.reset(); + UsedInInstr.clear(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; @@ -764,12 +766,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI) << " as used in instr\n"); - UsedInInstr.set(Reg); + UsedInInstr.insert(Reg); } // Also mark PartialDefs as used to avoid reallocation. for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i) - UsedInInstr.set(PartialDefs[i]); + UsedInInstr.insert(PartialDefs[i]); } /// addRetOperand - ensure that a return instruction has an operand for each @@ -838,7 +840,7 @@ void RAFast::AllocateBasicBlock() { // Add live-in registers as live. for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) - if (RegClassInfo.isAllocatable(*I)) + if (MRI->isAllocatable(*I)) definePhysReg(MII, *I, regReserved); SmallVector<unsigned, 8> VirtDead; @@ -942,7 +944,7 @@ void RAFast::AllocateBasicBlock() { } // Track registers used by instruction. - UsedInInstr.reset(); + UsedInInstr.clear(); // First scan. // Mark physreg uses and early clobbers as used. @@ -954,6 +956,11 @@ void RAFast::AllocateBasicBlock() { bool hasPhysDefs = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); + // Make sure MRI knows about registers clobbered by regmasks. + if (MO.isRegMask()) { + MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); + continue; + } if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; @@ -970,7 +977,7 @@ void RAFast::AllocateBasicBlock() { } continue; } - if (!RegClassInfo.isAllocatable(Reg)) continue; + if (!MRI->isAllocatable(Reg)) continue; if (MO.isUse()) { usePhysReg(MO); } else if (MO.isEarlyClobber()) { @@ -1016,11 +1023,13 @@ void RAFast::AllocateBasicBlock() { } } - MRI->addPhysRegsUsed(UsedInInstr); + for (UsedInInstrSet::iterator + I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) + MRI->setPhysRegUsed(*I); // Track registers defined by instruction - early clobbers and tied uses at // this point. - UsedInInstr.reset(); + UsedInInstr.clear(); if (hasEarlyClobbers) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); @@ -1030,7 +1039,7 @@ void RAFast::AllocateBasicBlock() { // Look for physreg defs and tied uses. if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - UsedInInstr.set(*AI); + UsedInInstr.insert(*AI); } } @@ -1058,7 +1067,7 @@ void RAFast::AllocateBasicBlock() { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (!RegClassInfo.isAllocatable(Reg)) continue; + if (!MRI->isAllocatable(Reg)) continue; definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? regFree : regReserved); continue; @@ -1080,7 +1089,9 @@ void RAFast::AllocateBasicBlock() { killVirtReg(VirtDead[i]); VirtDead.clear(); - MRI->addPhysRegsUsed(UsedInInstr); + for (UsedInInstrSet::iterator + I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) + MRI->setPhysRegUsed(*I); if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { DEBUG(dbgs() << "-- coalescing: " << *MI); @@ -1110,8 +1121,7 @@ void RAFast::AllocateBasicBlock() { /// bool RAFast::runOnMachineFunction(MachineFunction &Fn) { DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" - << "********** Function: " - << ((Value*)Fn.getFunction())->getName() << '\n'); + << "********** Function: " << Fn.getName() << '\n'); MF = &Fn; MRI = &MF->getRegInfo(); TM = &Fn.getTarget(); @@ -1119,7 +1129,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { TII = TM->getInstrInfo(); MRI->freezeReservedRegs(Fn); RegClassInfo.runOnMachineFunction(Fn); - UsedInInstr.resize(TRI->getNumRegs()); + UsedInInstr.clear(); + UsedInInstr.setUniverse(TRI->getNumRegs()); assert(!MRI->isSSA() && "regalloc requires leaving SSA"); diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp index 6ac5428..06f69c1e 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -24,7 +24,6 @@ #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Function.h" #include "llvm/PassAnalysisSupport.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" @@ -331,9 +330,9 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveDebugVariables>(); AU.addPreserved<LiveDebugVariables>(); - AU.addRequired<CalculateSpillWeights>(); AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); + AU.addRequired<CalculateSpillWeights>(); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); @@ -509,7 +508,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, /// /// @param VirtReg Live range that is about to be assigned. /// @param PhysReg Desired register for assignment. -/// @prarm IsHint True when PhysReg is VirtReg's preferred register. +/// @param IsHint True when PhysReg is VirtReg's preferred register. /// @param MaxCost Only look for cheaper candidates and update with new cost /// when returning true. /// @returns True when interference can be evicted cheaper than MaxCost. @@ -1746,8 +1745,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" - << "********** Function: " - << ((Value*)mf.getFunction())->getName() << '\n'); + << "********** Function: " << mf.getName() << '\n'); MF = &mf; if (VerifyEnabled) diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp index d0db26b..02ebce7 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -118,7 +118,6 @@ private: typedef std::vector<AllowedSet> AllowedSetMap; typedef std::pair<unsigned, unsigned> RegPair; typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap; - typedef std::vector<PBQP::Graph::NodeItr> NodeVector; typedef std::set<unsigned> RegSet; @@ -192,7 +191,6 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, const MachineLoopInfo *loopInfo, const RegSet &vregs) { - typedef std::vector<const LiveInterval*> LIVector; LiveIntervals *LIS = const_cast<LiveIntervals*>(lis); MachineRegisterInfo *mri = &mf->getRegInfo(); const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); @@ -209,8 +207,6 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, mri->setPhysRegUsed(Reg); } - BitVector reservedRegs = tri->getReservedRegs(*mf); - // Iterate over vregs. for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end(); vregItr != vregEnd; ++vregItr) { @@ -219,7 +215,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, LiveInterval *vregLI = &LIS->getInterval(vreg); // Record any overlaps with regmask operands. - BitVector regMaskOverlaps(tri->getNumRegs()); + BitVector regMaskOverlaps; LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps); // Compute an initial allowed set for the current vreg. @@ -228,7 +224,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf); for (unsigned i = 0; i != rawOrder.size(); ++i) { unsigned preg = rawOrder[i]; - if (reservedRegs.test(preg)) + if (mri->isReserved(preg)) continue; // vregLI crosses a regmask operand that clobbers preg. @@ -358,7 +354,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build( loopInfo->getLoopDepth(mbb)); if (cp.isPhys()) { - if (!lis->isAllocatable(dst)) { + if (!mf->getRegInfo().isAllocatable(dst)) { continue; } @@ -433,6 +429,7 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.addRequired<SlotIndexes>(); au.addPreserved<SlotIndexes>(); au.addRequired<LiveIntervals>(); + au.addPreserved<LiveIntervals>(); //au.addRequiredID(SplitCriticalEdgesID); if (customPassID) au.addRequiredID(*customPassID); @@ -444,6 +441,7 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.addRequired<MachineLoopInfo>(); au.addPreserved<MachineLoopInfo>(); au.addRequired<VirtRegMap>(); + au.addPreserved<VirtRegMap>(); MachineFunctionPass::getAnalysisUsage(au); } @@ -556,7 +554,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { mri->freezeReservedRegs(MF); - DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getName() << "\n"); // Allocator main loop: // @@ -570,11 +568,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // Find the vreg intervals in need of allocation. findVRegIntervalsToAlloc(); +#ifndef NDEBUG const Function* func = mf->getFunction(); std::string fqn = func->getParent()->getModuleIdentifier() + "." + func->getName().str(); - (void)fqn; +#endif // If there are non-empty intervals allocate them using pbqp. if (!vregsToAlloc.empty()) { diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp index 652bc30..805d235 100644 --- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -15,8 +15,9 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -57,10 +58,11 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { CalleeSaved = CSR; // Different reserved registers? - BitVector RR = TRI->getReservedRegs(*MF); - if (RR != Reserved) + const BitVector &RR = MF->getRegInfo().getReservedRegs(); + if (Reserved.size() != RR.size() || RR != Reserved) { Update = true; - Reserved = RR; + Reserved = RR; + } // Invalidate cached information from previous function. if (Update) diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp index 9906334..2538f10 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -55,6 +55,8 @@ STATISTIC(numCommutes , "Number of instruction commuting performed"); STATISTIC(numExtends , "Number of copies extended"); STATISTIC(NumReMats , "Number of instructions re-materialized"); STATISTIC(NumInflated , "Number of register classes inflated"); +STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested"); +STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved"); static cl::opt<bool> EnableJoining("join-liveintervals", @@ -123,6 +125,9 @@ namespace { /// can use this information below to update aliases. bool joinIntervals(CoalescerPair &CP); + /// Attempt joining two virtual registers. Return true on success. + bool joinVirtRegs(CoalescerPair &CP); + /// Attempt joining with a reserved physreg. bool joinReservedPhysReg(CoalescerPair &CP); @@ -193,12 +198,6 @@ INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", char RegisterCoalescer::ID = 0; -static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) { - if (!a) return b; - if (!b) return a; - return tri.composeSubRegIndices(a, b); -} - static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, unsigned &Src, unsigned &Dst, unsigned &SrcSub, unsigned &DstSub) { @@ -209,8 +208,8 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, SrcSub = MI->getOperand(1).getSubReg(); } else if (MI->isSubregToReg()) { Dst = MI->getOperand(0).getReg(); - DstSub = compose(tri, MI->getOperand(0).getSubReg(), - MI->getOperand(3).getImm()); + DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(), + MI->getOperand(3).getImm()); Src = MI->getOperand(2).getReg(); SrcSub = MI->getOperand(2).getSubReg(); } else @@ -349,7 +348,8 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { if (DstReg != Dst) return false; // Registers match, do the subregisters line up? - return compose(TRI, SrcIdx, SrcSub) == compose(TRI, DstIdx, DstSub); + return TRI.composeSubRegIndices(SrcIdx, SrcSub) == + TRI.composeSubRegIndices(DstIdx, DstSub); } } @@ -425,7 +425,8 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // If AValNo is defined as a copy from IntB, we can potentially process this. // Get the instruction that defines this value number. MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def); - if (!CP.isCoalescable(ACopyMI)) + // Don't allow any partial copies, even if isCoalescable() allows them. + if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy()) return false; // Get the LiveRange in IntB that this value number starts with. @@ -583,7 +584,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); unsigned NewReg = NewDstMO.getReg(); - if (NewReg != IntB.reg || !NewDstMO.isKill()) + if (NewReg != IntB.reg || !LiveRangeQuery(IntB, AValNo->def).isKill()) return false; // Make sure there are no other definitions of IntB that would reach the @@ -849,8 +850,17 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, // Update LiveDebugVariables. LDV->renameRegister(SrcReg, DstReg, SubIdx); + SmallPtrSet<MachineInstr*, 8> Visited; for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg); MachineInstr *UseMI = I.skipInstruction();) { + // Each instruction can only be rewritten once because sub-register + // composition is not always idempotent. When SrcReg != DstReg, rewriting + // the UseMI operands removes them from the SrcReg use-def chain, but when + // SrcReg is DstReg we could encounter UseMI twice if it has multiple + // operands mentioning the virtual register. + if (SrcReg == DstReg && !Visited.insert(UseMI)) + continue; + SmallVector<unsigned,8> Ops; bool Reads, Writes; tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); @@ -890,7 +900,7 @@ bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) { /// Always join simple intervals that are defined by a single copy from a /// reserved register. This doesn't increase register pressure, so it is /// always beneficial. - if (!RegClassInfo.isReserved(CP.getDstReg())) { + if (!MRI->isReserved(CP.getDstReg())) { DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); return false; } @@ -1065,7 +1075,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { /// Attempt joining with a reserved physreg. bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { assert(CP.isPhys() && "Must be a physreg copy"); - assert(RegClassInfo.isReserved(CP.getDstReg()) && "Not a reserved register"); + assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register"); LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS << '\n'); @@ -1102,347 +1112,797 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { return true; } -/// ComputeUltimateVN - Assuming we are going to join two live intervals, -/// compute what the resultant value numbers for each value in the input two -/// ranges will be. This is complicated by copies between the two which can -/// and will commonly cause multiple value numbers to be merged into one. -/// -/// VN is the value number that we're trying to resolve. InstDefiningValue -/// keeps track of the new InstDefiningValue assignment for the result -/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of -/// whether a value in this or other is a copy from the opposite set. -/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have -/// already been assigned. -/// -/// ThisFromOther[x] - If x is defined as a copy from the other interval, this -/// contains the value number the copy is from. -/// -static unsigned ComputeUltimateVN(VNInfo *VNI, - SmallVector<VNInfo*, 16> &NewVNInfo, - DenseMap<VNInfo*, VNInfo*> &ThisFromOther, - DenseMap<VNInfo*, VNInfo*> &OtherFromThis, - SmallVector<int, 16> &ThisValNoAssignments, - SmallVector<int, 16> &OtherValNoAssignments) { - unsigned VN = VNI->id; - - // If the VN has already been computed, just return it. - if (ThisValNoAssignments[VN] >= 0) - return ThisValNoAssignments[VN]; - assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers"); - - // If this val is not a copy from the other val, then it must be a new value - // number in the destination. - DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI); - if (I == ThisFromOther.end()) { - NewVNInfo.push_back(VNI); - return ThisValNoAssignments[VN] = NewVNInfo.size()-1; - } - VNInfo *OtherValNo = I->second; - - // Otherwise, this *is* a copy from the RHS. If the other side has already - // been computed, return it. - if (OtherValNoAssignments[OtherValNo->id] >= 0) - return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; - - // Mark this value number as currently being computed, then ask what the - // ultimate value # of the other value is. - ThisValNoAssignments[VN] = -2; - unsigned UltimateVN = - ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther, - OtherValNoAssignments, ThisValNoAssignments); - return ThisValNoAssignments[VN] = UltimateVN; -} +//===----------------------------------------------------------------------===// +// Interference checking and interval joining +//===----------------------------------------------------------------------===// +// +// In the easiest case, the two live ranges being joined are disjoint, and +// there is no interference to consider. It is quite common, though, to have +// overlapping live ranges, and we need to check if the interference can be +// resolved. +// +// The live range of a single SSA value forms a sub-tree of the dominator tree. +// This means that two SSA values overlap if and only if the def of one value +// is contained in the live range of the other value. As a special case, the +// overlapping values can be defined at the same index. +// +// The interference from an overlapping def can be resolved in these cases: +// +// 1. Coalescable copies. The value is defined by a copy that would become an +// identity copy after joining SrcReg and DstReg. The copy instruction will +// be removed, and the value will be merged with the source value. +// +// There can be several copies back and forth, causing many values to be +// merged into one. We compute a list of ultimate values in the joined live +// range as well as a mappings from the old value numbers. +// +// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI +// predecessors have a live out value. It doesn't cause real interference, +// and can be merged into the value it overlaps. Like a coalescable copy, it +// can be erased after joining. +// +// 3. Copy of external value. The overlapping def may be a copy of a value that +// is already in the other register. This is like a coalescable copy, but +// the live range of the source register must be trimmed after erasing the +// copy instruction: +// +// %src = COPY %ext +// %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext. +// +// 4. Clobbering undefined lanes. Vector registers are sometimes built by +// defining one lane at a time: +// +// %dst:ssub0<def,read-undef> = FOO +// %src = BAR +// %dst:ssub1<def> = COPY %src +// +// The live range of %src overlaps the %dst value defined by FOO, but +// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane +// which was undef anyway. +// +// The value mapping is more complicated in this case. The final live range +// will have different value numbers for both FOO and BAR, but there is no +// simple mapping from old to new values. It may even be necessary to add +// new PHI values. +// +// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that +// is live, but never read. This can happen because we don't compute +// individual live ranges per lane. +// +// %dst<def> = FOO +// %src = BAR +// %dst:ssub1<def> = COPY %src +// +// This kind of interference is only resolved locally. If the clobbered +// lane value escapes the block, the join is aborted. +namespace { +/// Track information about values in a single virtual register about to be +/// joined. Objects of this class are always created in pairs - one for each +/// side of the CoalescerPair. +class JoinVals { + LiveInterval &LI; + + // Location of this register in the final joined register. + // Either CP.DstIdx or CP.SrcIdx. + unsigned SubIdx; + + // Values that will be present in the final live range. + SmallVectorImpl<VNInfo*> &NewVNInfo; + + const CoalescerPair &CP; + LiveIntervals *LIS; + SlotIndexes *Indexes; + const TargetRegisterInfo *TRI; + + // Value number assignments. Maps value numbers in LI to entries in NewVNInfo. + // This is suitable for passing to LiveInterval::join(). + SmallVector<int, 8> Assignments; + + // Conflict resolution for overlapping values. + enum ConflictResolution { + // No overlap, simply keep this value. + CR_Keep, + + // Merge this value into OtherVNI and erase the defining instruction. + // Used for IMPLICIT_DEF, coalescable copies, and copies from external + // values. + CR_Erase, + + // Merge this value into OtherVNI but keep the defining instruction. + // This is for the special case where OtherVNI is defined by the same + // instruction. + CR_Merge, + + // Keep this value, and have it replace OtherVNI where possible. This + // complicates value mapping since OtherVNI maps to two different values + // before and after this def. + // Used when clobbering undefined or dead lanes. + CR_Replace, + + // Unresolved conflict. Visit later when all values have been mapped. + CR_Unresolved, + + // Unresolvable conflict. Abort the join. + CR_Impossible + }; -// Find out if we have something like -// A = X -// B = X -// if so, we can pretend this is actually -// A = X -// B = A -// which allows us to coalesce A and B. -// VNI is the definition of B. LR is the life range of A that includes -// the slot just before B. If we return true, we add "B = X" to DupCopies. -// This implies that A dominates B. -static bool RegistersDefinedFromSameValue(LiveIntervals &li, - const TargetRegisterInfo &tri, - CoalescerPair &CP, - VNInfo *VNI, - VNInfo *OtherVNI, - SmallVector<MachineInstr*, 8> &DupCopies) { - // FIXME: This is very conservative. For example, we don't handle - // physical registers. - - MachineInstr *MI = li.getInstructionFromIndex(VNI->def); - - if (!MI || CP.isPartial() || CP.isPhys()) - return false; + // Per-value info for LI. The lane bit masks are all relative to the final + // joined register, so they can be compared directly between SrcReg and + // DstReg. + struct Val { + ConflictResolution Resolution; - unsigned A = CP.getDstReg(); - if (!TargetRegisterInfo::isVirtualRegister(A)) - return false; + // Lanes written by this def, 0 for unanalyzed values. + unsigned WriteLanes; - unsigned B = CP.getSrcReg(); - if (!TargetRegisterInfo::isVirtualRegister(B)) - return false; + // Lanes with defined values in this register. Other lanes are undef and + // safe to clobber. + unsigned ValidLanes; - MachineInstr *OtherMI = li.getInstructionFromIndex(OtherVNI->def); - if (!OtherMI) - return false; + // Value in LI being redefined by this def. + VNInfo *RedefVNI; - if (MI->isImplicitDef()) { - DupCopies.push_back(MI); - return true; - } else { - if (!MI->isFullCopy()) - return false; - unsigned Src = MI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Src)) - return false; - if (!OtherMI->isFullCopy()) - return false; - unsigned OtherSrc = OtherMI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(OtherSrc)) - return false; + // Value in the other live range that overlaps this def, if any. + VNInfo *OtherVNI; - if (Src != OtherSrc) - return false; + // Is this value an IMPLICIT_DEF? + bool IsImplicitDef; - // If the copies use two different value numbers of X, we cannot merge - // A and B. - LiveInterval &SrcInt = li.getInterval(Src); - // getVNInfoBefore returns NULL for undef copies. In this case, the - // optimization is still safe. - if (SrcInt.getVNInfoBefore(OtherVNI->def) != - SrcInt.getVNInfoBefore(VNI->def)) - return false; + // True when the live range of this value will be pruned because of an + // overlapping CR_Replace value in the other live range. + bool Pruned; - DupCopies.push_back(MI); - return true; - } -} + // True once Pruned above has been computed. + bool PrunedComputed; -/// joinIntervals - Attempt to join these two intervals. On failure, this -/// returns false. -bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { - // Handle physreg joins separately. - if (CP.isPhys()) - return joinReservedPhysReg(CP); + Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0), + RedefVNI(0), OtherVNI(0), IsImplicitDef(false), Pruned(false), + PrunedComputed(false) {} - LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); - DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS - << '\n'); + bool isAnalyzed() const { return WriteLanes != 0; } + }; - // Compute the final value assignment, assuming that the live ranges can be - // coalesced. - SmallVector<int, 16> LHSValNoAssignments; - SmallVector<int, 16> RHSValNoAssignments; - DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS; - DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS; - SmallVector<VNInfo*, 16> NewVNInfo; + // One entry per value number in LI. + SmallVector<Val, 8> Vals; + + unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef); + VNInfo *stripCopies(VNInfo *VNI); + ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other); + void computeAssignment(unsigned ValNo, JoinVals &Other); + bool taintExtent(unsigned, unsigned, JoinVals&, + SmallVectorImpl<std::pair<SlotIndex, unsigned> >&); + bool usesLanes(MachineInstr *MI, unsigned, unsigned, unsigned); + bool isPrunedValue(unsigned ValNo, JoinVals &Other); + +public: + JoinVals(LiveInterval &li, unsigned subIdx, + SmallVectorImpl<VNInfo*> &newVNInfo, + const CoalescerPair &cp, + LiveIntervals *lis, + const TargetRegisterInfo *tri) + : LI(li), SubIdx(subIdx), NewVNInfo(newVNInfo), CP(cp), LIS(lis), + Indexes(LIS->getSlotIndexes()), TRI(tri), + Assignments(LI.getNumValNums(), -1), Vals(LI.getNumValNums()) + {} + + /// Analyze defs in LI and compute a value mapping in NewVNInfo. + /// Returns false if any conflicts were impossible to resolve. + bool mapValues(JoinVals &Other); + + /// Try to resolve conflicts that require all values to be mapped. + /// Returns false if any conflicts were impossible to resolve. + bool resolveConflicts(JoinVals &Other); + + /// Prune the live range of values in Other.LI where they would conflict with + /// CR_Replace values in LI. Collect end points for restoring the live range + /// after joining. + void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints); + + /// Erase any machine instructions that have been coalesced away. + /// Add erased instructions to ErasedInstrs. + /// Add foreign virtual registers to ShrinkRegs if their live range ended at + /// the erased instrs. + void eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs, + SmallVectorImpl<unsigned> &ShrinkRegs); + + /// Get the value assignments suitable for passing to LiveInterval::join. + const int *getAssignments() const { return Assignments.data(); } +}; +} // end anonymous namespace + +/// Compute the bitmask of lanes actually written by DefMI. +/// Set Redef if there are any partial register definitions that depend on the +/// previous value of the register. +unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) { + unsigned L = 0; + for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) { + if (!MO->isReg() || MO->getReg() != LI.reg || !MO->isDef()) + continue; + L |= TRI->getSubRegIndexLaneMask( + TRI->composeSubRegIndices(SubIdx, MO->getSubReg())); + if (MO->readsReg()) + Redef = true; + } + return L; +} - SmallVector<MachineInstr*, 8> DupCopies; - SmallVector<MachineInstr*, 8> DeadCopies; +/// Find the ultimate value that VNI was copied from. +VNInfo *JoinVals::stripCopies(VNInfo *VNI) { + while (!VNI->isPHIDef()) { + MachineInstr *MI = Indexes->getInstructionFromIndex(VNI->def); + assert(MI && "No defining instruction"); + if (!MI->isFullCopy()) + break; + unsigned Reg = MI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + break; + LiveRangeQuery LRQ(LIS->getInterval(Reg), VNI->def); + if (!LRQ.valueIn()) + break; + VNI = LRQ.valueIn(); + } + return VNI; +} - LiveInterval &LHS = LIS->getOrCreateInterval(CP.getDstReg()); - DEBUG(dbgs() << "\t\tLHS = " << PrintReg(CP.getDstReg(), TRI) << ' ' << LHS - << '\n'); +/// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. +/// Return a conflict resolution when possible, but leave the hard cases as +/// CR_Unresolved. +/// Recursively calls computeAssignment() on this and Other, guaranteeing that +/// both OtherVNI and RedefVNI have been analyzed and mapped before returning. +/// The recursion always goes upwards in the dominator tree, making loops +/// impossible. +JoinVals::ConflictResolution +JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { + Val &V = Vals[ValNo]; + assert(!V.isAnalyzed() && "Value has already been analyzed!"); + VNInfo *VNI = LI.getValNumInfo(ValNo); + if (VNI->isUnused()) { + V.WriteLanes = ~0u; + return CR_Keep; + } - // Loop over the value numbers of the LHS, seeing if any are defined from - // the RHS. - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - if (VNI->isUnused() || VNI->isPHIDef()) - continue; - MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); - assert(MI && "Missing def"); - if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy? - continue; + // Get the instruction defining this value, compute the lanes written. + const MachineInstr *DefMI = 0; + if (VNI->isPHIDef()) { + // Conservatively assume that all lanes in a PHI are valid. + V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx); + } else { + DefMI = Indexes->getInstructionFromIndex(VNI->def); + bool Redef = false; + V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); + + // If this is a read-modify-write instruction, there may be more valid + // lanes than the ones written by this instruction. + // This only covers partial redef operands. DefMI may have normal use + // operands reading the register. They don't contribute valid lanes. + // + // This adds ssub1 to the set of valid lanes in %src: + // + // %src:ssub1<def> = FOO + // + // This leaves only ssub1 valid, making any other lanes undef: + // + // %src:ssub1<def,read-undef> = FOO %src:ssub2 + // + // The <read-undef> flag on the def operand means that old lane values are + // not important. + if (Redef) { + V.RedefVNI = LiveRangeQuery(LI, VNI->def).valueIn(); + assert(V.RedefVNI && "Instruction is reading nonexistent value"); + computeAssignment(V.RedefVNI->id, Other); + V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; + } - // Figure out the value # from the RHS. - VNInfo *OtherVNI = RHS.getVNInfoBefore(VNI->def); - // The copy could be to an aliased physreg. - if (!OtherVNI) - continue; + // An IMPLICIT_DEF writes undef values. + if (DefMI->isImplicitDef()) { + V.IsImplicitDef = true; + V.ValidLanes &= ~V.WriteLanes; + } + } - // DstReg is known to be a register in the LHS interval. If the src is - // from the RHS interval, we can use its value #. - if (CP.isCoalescable(MI)) - DeadCopies.push_back(MI); - else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI, - DupCopies)) - continue; + // Find the value in Other that overlaps VNI->def, if any. + LiveRangeQuery OtherLRQ(Other.LI, VNI->def); + + // It is possible that both values are defined by the same instruction, or + // the values are PHIs defined in the same block. When that happens, the two + // values should be merged into one, but not into any preceding value. + // The first value defined or visited gets CR_Keep, the other gets CR_Merge. + if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) { + assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ"); + + // One value stays, the other is merged. Keep the earlier one, or the first + // one we see. + if (OtherVNI->def < VNI->def) + Other.computeAssignment(OtherVNI->id, *this); + else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) { + // This is an early-clobber def overlapping a live-in value in the other + // register. Not mergeable. + V.OtherVNI = OtherLRQ.valueIn(); + return CR_Impossible; + } + V.OtherVNI = OtherVNI; + Val &OtherV = Other.Vals[OtherVNI->id]; + // Keep this value, check for conflicts when analyzing OtherVNI. + if (!OtherV.isAnalyzed()) + return CR_Keep; + // Both sides have been analyzed now. + // Allow overlapping PHI values. Any real interference would show up in a + // predecessor, the PHI itself can't introduce any conflicts. + if (VNI->isPHIDef()) + return CR_Merge; + if (V.ValidLanes & OtherV.ValidLanes) + // Overlapping lanes can't be resolved. + return CR_Impossible; + else + return CR_Merge; + } - LHSValsDefinedFromRHS[VNI] = OtherVNI; + // No simultaneous def. Is Other live at the def? + V.OtherVNI = OtherLRQ.valueIn(); + if (!V.OtherVNI) + // No overlap, no conflict. + return CR_Keep; + + assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ"); + + // We have overlapping values, or possibly a kill of Other. + // Recursively compute assignments up the dominator tree. + Other.computeAssignment(V.OtherVNI->id, *this); + const Val &OtherV = Other.Vals[V.OtherVNI->id]; + + // Allow overlapping PHI values. Any real interference would show up in a + // predecessor, the PHI itself can't introduce any conflicts. + if (VNI->isPHIDef()) + return CR_Replace; + + // Check for simple erasable conflicts. + if (DefMI->isImplicitDef()) + return CR_Erase; + + // Include the non-conflict where DefMI is a coalescable copy that kills + // OtherVNI. We still want the copy erased and value numbers merged. + if (CP.isCoalescable(DefMI)) { + // Some of the lanes copied from OtherVNI may be undef, making them undef + // here too. + V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes; + return CR_Erase; } - // Loop over the value numbers of the RHS, seeing if any are defined from - // the LHS. - for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - if (VNI->isUnused() || VNI->isPHIDef()) - continue; - MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); - assert(MI && "Missing def"); - if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy? - continue; + // This may not be a real conflict if DefMI simply kills Other and defines + // VNI. + if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def) + return CR_Keep; + + // Handle the case where VNI and OtherVNI can be proven to be identical: + // + // %other = COPY %ext + // %this = COPY %ext <-- Erase this copy + // + if (DefMI->isFullCopy() && !CP.isPartial() && + stripCopies(VNI) == stripCopies(V.OtherVNI)) + return CR_Erase; + + // If the lanes written by this instruction were all undef in OtherVNI, it is + // still safe to join the live ranges. This can't be done with a simple value + // mapping, though - OtherVNI will map to multiple values: + // + // 1 %dst:ssub0 = FOO <-- OtherVNI + // 2 %src = BAR <-- VNI + // 3 %dst:ssub1 = COPY %src<kill> <-- Eliminate this copy. + // 4 BAZ %dst<kill> + // 5 QUUX %src<kill> + // + // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace + // handles this complex value mapping. + if ((V.WriteLanes & OtherV.ValidLanes) == 0) + return CR_Replace; + + // If the other live range is killed by DefMI and the live ranges are still + // overlapping, it must be because we're looking at an early clobber def: + // + // %dst<def,early-clobber> = ASM %src<kill> + // + // In this case, it is illegal to merge the two live ranges since the early + // clobber def would clobber %src before it was read. + if (OtherLRQ.isKill()) { + // This case where the def doesn't overlap the kill is handled above. + assert(VNI->def.isEarlyClobber() && + "Only early clobber defs can overlap a kill"); + return CR_Impossible; + } - // Figure out the value # from the LHS. - VNInfo *OtherVNI = LHS.getVNInfoBefore(VNI->def); - // The copy could be to an aliased physreg. - if (!OtherVNI) - continue; + // VNI is clobbering live lanes in OtherVNI, but there is still the + // possibility that no instructions actually read the clobbered lanes. + // If we're clobbering all the lanes in OtherVNI, at least one must be read. + // Otherwise Other.LI wouldn't be live here. + if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0) + return CR_Impossible; + + // We need to verify that no instructions are reading the clobbered lanes. To + // save compile time, we'll only check that locally. Don't allow the tainted + // value to escape the basic block. + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB)) + return CR_Impossible; + + // There are still some things that could go wrong besides clobbered lanes + // being read, for example OtherVNI may be only partially redefined in MBB, + // and some clobbered lanes could escape the block. Save this analysis for + // resolveConflicts() when all values have been mapped. We need to know + // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute + // that now - the recursive analyzeValue() calls must go upwards in the + // dominator tree. + return CR_Unresolved; +} - // DstReg is known to be a register in the RHS interval. If the src is - // from the LHS interval, we can use its value #. - if (CP.isCoalescable(MI)) - DeadCopies.push_back(MI); - else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI, - DupCopies)) - continue; +/// Compute the value assignment for ValNo in LI. +/// This may be called recursively by analyzeValue(), but never for a ValNo on +/// the stack. +void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { + Val &V = Vals[ValNo]; + if (V.isAnalyzed()) { + // Recursion should always move up the dominator tree, so ValNo is not + // supposed to reappear before it has been assigned. + assert(Assignments[ValNo] != -1 && "Bad recursion?"); + return; + } + switch ((V.Resolution = analyzeValue(ValNo, Other))) { + case CR_Erase: + case CR_Merge: + // Merge this ValNo into OtherVNI. + assert(V.OtherVNI && "OtherVNI not assigned, can't merge."); + assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion"); + Assignments[ValNo] = Other.Assignments[V.OtherVNI->id]; + DEBUG(dbgs() << "\t\tmerge " << PrintReg(LI.reg) << ':' << ValNo << '@' + << LI.getValNumInfo(ValNo)->def << " into " + << PrintReg(Other.LI.reg) << ':' << V.OtherVNI->id << '@' + << V.OtherVNI->def << " --> @" + << NewVNInfo[Assignments[ValNo]]->def << '\n'); + break; + case CR_Replace: + case CR_Unresolved: + // The other value is going to be pruned if this join is successful. + assert(V.OtherVNI && "OtherVNI not assigned, can't prune"); + Other.Vals[V.OtherVNI->id].Pruned = true; + // Fall through. + default: + // This value number needs to go in the final joined live range. + Assignments[ValNo] = NewVNInfo.size(); + NewVNInfo.push_back(LI.getValNumInfo(ValNo)); + break; + } +} - RHSValsDefinedFromLHS[VNI] = OtherVNI; +bool JoinVals::mapValues(JoinVals &Other) { + for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + computeAssignment(i, Other); + if (Vals[i].Resolution == CR_Impossible) { + DEBUG(dbgs() << "\t\tinterference at " << PrintReg(LI.reg) << ':' << i + << '@' << LI.getValNumInfo(i)->def << '\n'); + return false; + } } + return true; +} - LHSValNoAssignments.resize(LHS.getNumValNums(), -1); - RHSValNoAssignments.resize(RHS.getNumValNums(), -1); - NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); +/// Assuming ValNo is going to clobber some valid lanes in Other.LI, compute +/// the extent of the tainted lanes in the block. +/// +/// Multiple values in Other.LI can be affected since partial redefinitions can +/// preserve previously tainted lanes. +/// +/// 1 %dst = VLOAD <-- Define all lanes in %dst +/// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0 +/// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0 +/// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read +/// +/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes) +/// entry to TaintedVals. +/// +/// Returns false if the tainted lanes extend beyond the basic block. +bool JoinVals:: +taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, + SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) { + VNInfo *VNI = LI.getValNumInfo(ValNo); + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB); + + // Scan Other.LI from VNI.def to MBBEnd. + LiveInterval::iterator OtherI = Other.LI.find(VNI->def); + assert(OtherI != Other.LI.end() && "No conflict?"); + do { + // OtherI is pointing to a tainted value. Abort the join if the tainted + // lanes escape the block. + SlotIndex End = OtherI->end; + if (End >= MBBEnd) { + DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.LI.reg) << ':' + << OtherI->valno->id << '@' << OtherI->start << '\n'); + return false; + } + DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.LI.reg) << ':' + << OtherI->valno->id << '@' << OtherI->start + << " to " << End << '\n'); + // A dead def is not a problem. + if (End.isDead()) + break; + TaintExtent.push_back(std::make_pair(End, TaintedLanes)); + + // Check for another def in the MBB. + if (++OtherI == Other.LI.end() || OtherI->start >= MBBEnd) + break; + + // Lanes written by the new def are no longer tainted. + const Val &OV = Other.Vals[OtherI->valno->id]; + TaintedLanes &= ~OV.WriteLanes; + if (!OV.RedefVNI) + break; + } while (TaintedLanes); + return true; +} - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) +/// Return true if MI uses any of the given Lanes from Reg. +/// This does not include partial redefinitions of Reg. +bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx, + unsigned Lanes) { + if (MI->isDebugValue()) + return false; + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg) continue; - ComputeUltimateVN(VNI, NewVNInfo, - LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, - LHSValNoAssignments, RHSValNoAssignments); - } - for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + if (!MO->readsReg()) continue; - // If this value number isn't a copy from the LHS, it's a new number. - if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { - NewVNInfo.push_back(VNI); - RHSValNoAssignments[VN] = NewVNInfo.size()-1; + if (Lanes & TRI->getSubRegIndexLaneMask( + TRI->composeSubRegIndices(SubIdx, MO->getSubReg()))) + return true; + } + return false; +} + +bool JoinVals::resolveConflicts(JoinVals &Other) { + for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + Val &V = Vals[i]; + assert (V.Resolution != CR_Impossible && "Unresolvable conflict"); + if (V.Resolution != CR_Unresolved) continue; - } + DEBUG(dbgs() << "\t\tconflict at " << PrintReg(LI.reg) << ':' << i + << '@' << LI.getValNumInfo(i)->def << '\n'); + ++NumLaneConflicts; + assert(V.OtherVNI && "Inconsistent conflict resolution."); + VNInfo *VNI = LI.getValNumInfo(i); + const Val &OtherV = Other.Vals[V.OtherVNI->id]; + + // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the + // join, those lanes will be tainted with a wrong value. Get the extent of + // the tainted lanes. + unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes; + SmallVector<std::pair<SlotIndex, unsigned>, 8> TaintExtent; + if (!taintExtent(i, TaintedLanes, Other, TaintExtent)) + // Tainted lanes would extend beyond the basic block. + return false; - ComputeUltimateVN(VNI, NewVNInfo, - RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, - RHSValNoAssignments, LHSValNoAssignments); - } + assert(!TaintExtent.empty() && "There should be at least one conflict."); - // Armed with the mappings of LHS/RHS values to ultimate values, walk the - // interval lists to see if these intervals are coalescable. - LiveInterval::const_iterator I = LHS.begin(); - LiveInterval::const_iterator IE = LHS.end(); - LiveInterval::const_iterator J = RHS.begin(); - LiveInterval::const_iterator JE = RHS.end(); - - // Collect interval end points that will no longer be kills. - SmallVector<MachineInstr*, 8> LHSOldKills; - SmallVector<MachineInstr*, 8> RHSOldKills; - - // Skip ahead until the first place of potential sharing. - if (I != IE && J != JE) { - if (I->start < J->start) { - I = std::upper_bound(I, IE, J->start); - if (I != LHS.begin()) --I; - } else if (J->start < I->start) { - J = std::upper_bound(J, JE, I->start); - if (J != RHS.begin()) --J; + // Now look at the instructions from VNI->def to TaintExtent (inclusive). + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + MachineBasicBlock::iterator MI = MBB->begin(); + if (!VNI->isPHIDef()) { + MI = Indexes->getInstructionFromIndex(VNI->def); + // No need to check the instruction defining VNI for reads. + ++MI; } - } - - while (I != IE && J != JE) { - // Determine if these two live ranges overlap. - // If so, check value # info to determine if they are really different. - if (I->end > J->start && J->end > I->start) { - // If the live range overlap will map to the same value number in the - // result liverange, we can still coalesce them. If not, we can't. - if (LHSValNoAssignments[I->valno->id] != - RHSValNoAssignments[J->valno->id]) + assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) && + "Interference ends on VNI->def. Should have been handled earlier"); + MachineInstr *LastMI = + Indexes->getInstructionFromIndex(TaintExtent.front().first); + assert(LastMI && "Range must end at a proper instruction"); + unsigned TaintNum = 0; + for(;;) { + assert(MI != MBB->end() && "Bad LastMI"); + if (usesLanes(MI, Other.LI.reg, Other.SubIdx, TaintedLanes)) { + DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); return false; - - // Extended live ranges should no longer be killed. - if (!I->end.isBlock() && I->end < J->end) - if (MachineInstr *MI = LIS->getInstructionFromIndex(I->end)) - LHSOldKills.push_back(MI); - if (!J->end.isBlock() && J->end < I->end) - if (MachineInstr *MI = LIS->getInstructionFromIndex(J->end)) - RHSOldKills.push_back(MI); + } + // LastMI is the last instruction to use the current value. + if (&*MI == LastMI) { + if (++TaintNum == TaintExtent.size()) + break; + LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first); + assert(LastMI && "Range must end at a proper instruction"); + TaintedLanes = TaintExtent[TaintNum].second; + } + ++MI; } - if (I->end < J->end) - ++I; - else - ++J; - } - - // Clear kill flags where live ranges are extended. - while (!LHSOldKills.empty()) - LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI); - while (!RHSOldKills.empty()) - RHSOldKills.pop_back_val()->clearRegisterKills(RHS.reg, TRI); - - if (LHSValNoAssignments.empty()) - LHSValNoAssignments.push_back(-1); - if (RHSValNoAssignments.empty()) - RHSValNoAssignments.push_back(-1); - - // Now erase all the redundant copies. - for (unsigned i = 0, e = DeadCopies.size(); i != e; ++i) { - MachineInstr *MI = DeadCopies[i]; - if (!ErasedInstrs.insert(MI)) - continue; - DEBUG(dbgs() << "\t\terased:\t" << LIS->getInstructionIndex(MI) - << '\t' << *MI); - LIS->RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); + // The tainted lanes are unused. + V.Resolution = CR_Replace; + ++NumLaneResolves; } + return true; +} - SmallVector<unsigned, 8> SourceRegisters; - for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(), - E = DupCopies.end(); I != E; ++I) { - MachineInstr *MI = *I; - if (!ErasedInstrs.insert(MI)) - continue; +// Determine if ValNo is a copy of a value number in LI or Other.LI that will +// be pruned: +// +// %dst = COPY %src +// %src = COPY %dst <-- This value to be pruned. +// %dst = COPY %src <-- This value is a copy of a pruned value. +// +bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) { + Val &V = Vals[ValNo]; + if (V.Pruned || V.PrunedComputed) + return V.Pruned; + + if (V.Resolution != CR_Erase && V.Resolution != CR_Merge) + return V.Pruned; + + // Follow copies up the dominator tree and check if any intermediate value + // has been pruned. + V.PrunedComputed = true; + V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this); + return V.Pruned; +} - // If MI is a copy, then we have pretended that the assignment to B in - // A = X - // B = X - // was actually a copy from A. Now that we decided to coalesce A and B, - // transform the code into - // A = X - // In the case of the implicit_def, we just have to remove it. - if (!MI->isImplicitDef()) { - unsigned Src = MI->getOperand(1).getReg(); - SourceRegisters.push_back(Src); +void JoinVals::pruneValues(JoinVals &Other, + SmallVectorImpl<SlotIndex> &EndPoints) { + for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + SlotIndex Def = LI.getValNumInfo(i)->def; + switch (Vals[i].Resolution) { + case CR_Keep: + break; + case CR_Replace: { + // This value takes precedence over the value in Other.LI. + LIS->pruneValue(&Other.LI, Def, &EndPoints); + // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF + // instructions are only inserted to provide a live-out value for PHI + // predecessors, so the instruction should simply go away once its value + // has been replaced. + Val &OtherV = Other.Vals[Vals[i].OtherVNI->id]; + bool EraseImpDef = OtherV.IsImplicitDef && OtherV.Resolution == CR_Keep; + if (!Def.isBlock()) { + // Remove <def,read-undef> flags. This def is now a partial redef. + // Also remove <def,dead> flags since the joined live range will + // continue past this instruction. + for (MIOperands MO(Indexes->getInstructionFromIndex(Def)); + MO.isValid(); ++MO) + if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) { + MO->setIsUndef(EraseImpDef); + MO->setIsDead(false); + } + // This value will reach instructions below, but we need to make sure + // the live range also reaches the instruction at Def. + if (!EraseImpDef) + EndPoints.push_back(Def); + } + DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def + << ": " << Other.LI << '\n'); + break; + } + case CR_Erase: + case CR_Merge: + if (isPrunedValue(i, Other)) { + // This value is ultimately a copy of a pruned value in LI or Other.LI. + // We can no longer trust the value mapping computed by + // computeAssignment(), the value that was originally copied could have + // been replaced. + LIS->pruneValue(&LI, Def, &EndPoints); + DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(LI.reg) << " at " + << Def << ": " << LI << '\n'); + } + break; + case CR_Unresolved: + case CR_Impossible: + llvm_unreachable("Unresolved conflicts"); } - LIS->RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); } +} - // If B = X was the last use of X in a liverange, we have to shrink it now - // that B = X is gone. - for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(), - E = SourceRegisters.end(); I != E; ++I) { - LIS->shrinkToUses(&LIS->getInterval(*I)); +void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs, + SmallVectorImpl<unsigned> &ShrinkRegs) { + for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + // Get the def location before markUnused() below invalidates it. + SlotIndex Def = LI.getValNumInfo(i)->def; + switch (Vals[i].Resolution) { + case CR_Keep: + // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any + // longer. The IMPLICIT_DEF instructions are only inserted by + // PHIElimination to guarantee that all PHI predecessors have a value. + if (!Vals[i].IsImplicitDef || !Vals[i].Pruned) + break; + // Remove value number i from LI. Note that this VNInfo is still present + // in NewVNInfo, so it will appear as an unused value number in the final + // joined interval. + LI.getValNumInfo(i)->markUnused(); + LI.removeValNo(LI.getValNumInfo(i)); + DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n'); + // FALL THROUGH. + + case CR_Erase: { + MachineInstr *MI = Indexes->getInstructionFromIndex(Def); + assert(MI && "No instruction to erase"); + if (MI->isCopy()) { + unsigned Reg = MI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg) && + Reg != CP.getSrcReg() && Reg != CP.getDstReg()) + ShrinkRegs.push_back(Reg); + } + ErasedInstrs.insert(MI); + DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI); + LIS->RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + break; + } + default: + break; + } } +} + +bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { + SmallVector<VNInfo*, 16> NewVNInfo; + LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); + LiveInterval &LHS = LIS->getInterval(CP.getDstReg()); + JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI); + JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI); + + DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS + << "\n\t\tLHS = " << PrintReg(CP.getDstReg()) << ' ' << LHS + << '\n'); + + // First compute NewVNInfo and the simple value mappings. + // Detect impossible conflicts early. + if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) + return false; + + // Some conflicts can only be resolved after all values have been mapped. + if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) + return false; - // If we get here, we know that we can coalesce the live ranges. Ask the - // intervals to coalesce themselves now. - LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, + // All clear, the live ranges can be merged. + + // The merging algorithm in LiveInterval::join() can't handle conflicting + // value mappings, so we need to remove any live ranges that overlap a + // CR_Replace resolution. Collect a set of end points that can be used to + // restore the live range after joining. + SmallVector<SlotIndex, 8> EndPoints; + LHSVals.pruneValues(RHSVals, EndPoints); + RHSVals.pruneValues(LHSVals, EndPoints); + + // Erase COPY and IMPLICIT_DEF instructions. This may cause some external + // registers to require trimming. + SmallVector<unsigned, 8> ShrinkRegs; + LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); + RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); + while (!ShrinkRegs.empty()) + LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); + + // Join RHS into LHS. + LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo, MRI); + + // Kill flags are going to be wrong if the live ranges were overlapping. + // Eventually, we should simply clear all kill flags when computing live + // ranges. They are reinserted after register allocation. + MRI->clearKillFlags(LHS.reg); + MRI->clearKillFlags(RHS.reg); + + if (EndPoints.empty()) + return true; + + // Recompute the parts of the live range we had to remove because of + // CR_Replace conflicts. + DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() + << " points: " << LHS << '\n'); + LIS->extendToIndices(&LHS, EndPoints); return true; } +/// joinIntervals - Attempt to join these two intervals. On failure, this +/// returns false. +bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { + return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP); +} + namespace { // DepthMBBCompare - Comparison predicate that sort first based on the loop // depth of the basic block (the unsigned), and then on the MBB number. @@ -1564,8 +2024,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { Loops = &getAnalysis<MachineLoopInfo>(); DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" - << "********** Function: " - << ((Value*)MF->getFunction())->getName() << '\n'); + << "********** Function: " << MF->getName() << '\n'); if (VerifyCoalescing) MF->verify(this, "Before register coalescing"); diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h index 8a6df98..47c3df1 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h @@ -63,6 +63,13 @@ namespace llvm { : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0), Partial(false), CrossClass(false), Flipped(false), NewRC(0) {} + /// Create a CoalescerPair representing a virtreg-to-physreg copy. + /// No need to call setRegisters(). + CoalescerPair(unsigned VirtReg, unsigned PhysReg, + const TargetRegisterInfo &tri) + : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0), + Partial(false), CrossClass(false), Flipped(false), NewRC(0) {} + /// setRegisters - set registers to match the copy instruction MI. Return /// false if MI is not a coalescable copy instruction. bool setRegisters(const MachineInstr*); diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp index 43448c8..543c426 100644 --- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp @@ -63,7 +63,8 @@ void RegisterPressure::decrease(const TargetRegisterClass *RC, decreaseSetPressure(MaxSetPressure, RC, TRI); } -void RegisterPressure::dump(const TargetRegisterInfo *TRI) { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << "Live In: "; for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i) dbgs() << PrintReg(LiveInRegs[i], TRI) << " "; @@ -78,6 +79,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) { << '\n'; } } +#endif /// Increase the current pressure as impacted by these physical registers and /// bump the high water mark if needed. @@ -320,10 +322,8 @@ struct RegisterOperands { if (findReg(MO.getReg(), isVReg, DeadDefs, TRI) == DeadDefs.end()) DeadDefs.push_back(MO.getReg()); } - else { - if (findReg(MO.getReg(), isVReg, Defs, TRI) == Defs.end()) - Defs.push_back(MO.getReg()); - } + else if (findReg(MO.getReg(), isVReg, Defs, TRI) == Defs.end()) + Defs.push_back(MO.getReg()); } } }; @@ -335,7 +335,7 @@ static void collectOperands(const MachineInstr *MI, PhysRegOperands &PhysRegOpers, VirtRegOperands &VirtRegOpers, const TargetRegisterInfo *TRI, - const RegisterClassInfo *RCI) { + const MachineRegisterInfo *MRI) { for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) { const MachineOperand &MO = *OperI; if (!MO.isReg() || !MO.getReg()) @@ -343,7 +343,7 @@ static void collectOperands(const MachineInstr *MI, if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) VirtRegOpers.collect(MO, TRI); - else if (RCI->isAllocatable(MO.getReg())) + else if (MRI->isAllocatable(MO.getReg())) PhysRegOpers.collect(MO, TRI); } // Remove redundant physreg dead defs. @@ -449,7 +449,7 @@ bool RegPressureTracker::recede() { PhysRegOperands PhysRegOpers; VirtRegOperands VirtRegOpers; - collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI); + collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI); // Boost pressure for all dead defs together. increasePhysRegPressure(PhysRegOpers.DeadDefs); @@ -522,7 +522,7 @@ bool RegPressureTracker::advance() { PhysRegOperands PhysRegOpers; VirtRegOperands VirtRegOpers; - collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI); + collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI); // Kill liveness at last uses. for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) { @@ -664,7 +664,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { // Account for register pressure similar to RegPressureTracker::recede(). PhysRegOperands PhysRegOpers; VirtRegOperands VirtRegOpers; - collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI); + collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI); // Boost max pressure for all dead defs together. // Since CurrSetPressure and MaxSetPressure @@ -674,9 +674,16 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { decreaseVirtRegPressure(VirtRegOpers.DeadDefs); // Kill liveness at live defs. - decreasePhysRegPressure(PhysRegOpers.Defs); - decreaseVirtRegPressure(VirtRegOpers.Defs); - + for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = PhysRegOpers.Defs[i]; + if (!findReg(Reg, false, PhysRegOpers.Uses, TRI)) + decreasePhysRegPressure(PhysRegOpers.Defs); + } + for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = VirtRegOpers.Defs[i]; + if (!findReg(Reg, true, VirtRegOpers.Uses, TRI)) + decreaseVirtRegPressure(VirtRegOpers.Defs); + } // Generate liveness for uses. for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) { unsigned Reg = PhysRegOpers.Uses[i]; @@ -750,7 +757,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { // Account for register pressure similar to RegPressureTracker::recede(). PhysRegOperands PhysRegOpers; VirtRegOperands VirtRegOpers; - collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI); + collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI); // Kill liveness at last uses. Assume allocatable physregs are single-use // rather than checking LiveIntervals. diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp index d673794..5ec6564 100644 --- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -92,9 +92,6 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { KillRegs.resize(NumPhysRegs); DefRegs.resize(NumPhysRegs); - // Create reserved registers bitvector. - ReservedRegs = TRI->getReservedRegs(MF); - // Create callee-saved registers bitvector. CalleeSavedRegs.resize(NumPhysRegs); const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF); @@ -225,9 +222,9 @@ void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) { used = RegsAvailable; used.flip(); if (includeReserved) - used |= ReservedRegs; + used |= MRI->getReservedRegs(); else - used.reset(ReservedRegs); + used.reset(MRI->getReservedRegs()); } unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp index 752f8e4..9a65071 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -279,6 +279,7 @@ void SUnit::ComputeHeight() { } while (!WorkList.empty()); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or /// a group of nodes flagged together. void SUnit::dump(const ScheduleDAG *G) const { @@ -336,6 +337,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { } dbgs() << "\n"; } +#endif #ifndef NDEBUG /// VerifyScheduledDAG - Verify that all SUnits were scheduled and that diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 9c1dba3..a4d4a93 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ScheduleDAGILP.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" @@ -30,6 +31,7 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallPtrSet.h" @@ -44,14 +46,15 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineDominatorTree &mdt, bool IsPostRAFlag, LiveIntervals *lis) - : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), - InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis), - IsPostRA(IsPostRAFlag), UnitLatencies(false), CanHandleTerminators(false), - LoopRegs(MDT), FirstDbgValue(0) { + : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis), + IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && "Virtual registers must be removed prior to PostRA scheduling"); + + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + SchedModel.init(*ST.getSchedModel(), &ST, TII); } /// getUnderlyingObjectFromInt - This is the function that does the work of @@ -68,7 +71,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { // object. We don't have to worry about the case where the // object address is somehow being computed by the multiply, // because our callers only care when the result is an - // identifibale object. + // identifiable object. if (U->getOpcode() != Instruction::Add || (!isa<ConstantInt>(U->getOperand(1)) && Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) @@ -135,10 +138,6 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { BB = bb; - LoopRegs.Deps.clear(); - if (MachineLoop *ML = MLI.getLoopFor(BB)) - if (BB == ML->getLoopLatch()) - LoopRegs.VisitLoop(ML); } void ScheduleDAGInstrs::finishBlock() { @@ -174,9 +173,6 @@ void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, EndIndex = endcount; MISUnitMap.clear(); - // Check to see if the scheduler cares about latencies. - UnitLatencies = forceUnitLatencies(); - ScheduleDAG::clearDAG(); } @@ -209,7 +205,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { if (Reg == 0) continue; if (TRI->isPhysicalRegister(Reg)) - Uses[Reg].push_back(&ExitSU); + Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1)); else { assert(!IsPostRA && "Virtual register encountered after regalloc."); addVRegUseDeps(&ExitSU, i); @@ -225,59 +221,44 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; if (!Uses.contains(Reg)) - Uses[Reg].push_back(&ExitSU); + Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1)); } } } /// MO is an operand of SU's instruction that defines a physical register. Add /// data dependencies from SU to any uses of the physical register. -void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, - const MachineOperand &MO) { +void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { + const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx); assert(MO.isDef() && "expect physreg def"); // Ask the target if address-backscheduling is desirable, and if so how much. const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); - unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); - unsigned DataLatency = SU->Latency; for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); Alias.isValid(); ++Alias) { if (!Uses.contains(*Alias)) continue; - std::vector<SUnit*> &UseList = Uses[*Alias]; + std::vector<PhysRegSUOper> &UseList = Uses[*Alias]; for (unsigned i = 0, e = UseList.size(); i != e; ++i) { - SUnit *UseSU = UseList[i]; + SUnit *UseSU = UseList[i].SU; if (UseSU == SU) continue; - unsigned LDataLatency = DataLatency; - // Optionally add in a special extra latency for nodes that - // feed addresses. - // TODO: Perhaps we should get rid of - // SpecialAddressLatency and just move this into - // adjustSchedDependency for the targets that care about it. - if (SpecialAddressLatency != 0 && !UnitLatencies && - UseSU != &ExitSU) { - MachineInstr *UseMI = UseSU->getInstr(); - const MCInstrDesc &UseMCID = UseMI->getDesc(); - int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); - assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); - if (RegUseIndex >= 0 && - (UseMI->mayLoad() || UseMI->mayStore()) && - (unsigned)RegUseIndex < UseMCID.getNumOperands() && - UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) - LDataLatency += SpecialAddressLatency; - } - // Adjust the dependence latency using operand def/use - // information (if any), and then allow the target to - // perform its own adjustments. - SDep dep(SU, SDep::Data, LDataLatency, *Alias); - if (!UnitLatencies) { - unsigned Latency = computeOperandLatency(SU, UseSU, dep); - dep.setLatency(Latency); - - ST.adjustSchedDependency(SU, UseSU, dep); - } + + SDep dep(SU, SDep::Data, *Alias); + + // Adjust the dependence latency using operand def/use information, + // then allow the target to perform its own adjustments. + int UseOp = UseList[i].OpIdx; + MachineInstr *RegUse = UseOp < 0 ? 0 : UseSU->getInstr(); + dep.setLatency( + SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, + RegUse, UseOp, /*FindMin=*/false)); + dep.setMinLatency( + SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, + RegUse, UseOp, /*FindMin=*/true)); + + ST.adjustSchedDependency(SU, UseSU, dep); UseSU->addPred(dep); } } @@ -301,20 +282,23 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { Alias.isValid(); ++Alias) { if (!Defs.contains(*Alias)) continue; - std::vector<SUnit *> &DefList = Defs[*Alias]; + std::vector<PhysRegSUOper> &DefList = Defs[*Alias]; for (unsigned i = 0, e = DefList.size(); i != e; ++i) { - SUnit *DefSU = DefList[i]; + SUnit *DefSU = DefList[i].SU; if (DefSU == &ExitSU) continue; if (DefSU != SU && (Kind != SDep::Output || !MO.isDead() || !DefSU->getInstr()->registerDefIsDead(*Alias))) { if (Kind == SDep::Anti) - DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias)); + DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias)); else { - unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx, - DefSU->getInstr()); - DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias)); + SDep Dep(SU, Kind, /*Reg=*/*Alias); + unsigned OutLatency = + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()); + Dep.setMinLatency(OutLatency); + Dep.setLatency(OutLatency); + DefSU->addPred(Dep); } } } @@ -324,61 +308,14 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's uses. // Push this SUnit on the use list. - Uses[MO.getReg()].push_back(SU); + Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx)); } else { - addPhysRegDataDeps(SU, MO); + addPhysRegDataDeps(SU, OperIdx); // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's defs. - std::vector<SUnit *> &DefList = Defs[MO.getReg()]; - - // If a def is going to wrap back around to the top of the loop, - // backschedule it. - if (!UnitLatencies && DefList.empty()) { - LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg()); - if (I != LoopRegs.Deps.end()) { - const MachineOperand *UseMO = I->second.first; - unsigned Count = I->second.second; - const MachineInstr *UseMI = UseMO->getParent(); - unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); - const MCInstrDesc &UseMCID = UseMI->getDesc(); - const TargetSubtargetInfo &ST = - TM.getSubtarget<TargetSubtargetInfo>(); - unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); - // TODO: If we knew the total depth of the region here, we could - // handle the case where the whole loop is inside the region but - // is large enough that the isScheduleHigh trick isn't needed. - if (UseMOIdx < UseMCID.getNumOperands()) { - // Currently, we only support scheduling regions consisting of - // single basic blocks. Check to see if the instruction is in - // the same region by checking to see if it has the same parent. - if (UseMI->getParent() != MI->getParent()) { - unsigned Latency = SU->Latency; - if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) - Latency += SpecialAddressLatency; - // This is a wild guess as to the portion of the latency which - // will be overlapped by work done outside the current - // scheduling region. - Latency -= std::min(Latency, Count); - // Add the artificial edge. - ExitSU.addPred(SDep(SU, SDep::Order, Latency, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); - } else if (SpecialAddressLatency > 0 && - UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { - // The entire loop body is within the current scheduling region - // and the latency of this operation is assumed to be greater - // than the latency of the loop. - // TODO: Recursively mark data-edge predecessors as - // isScheduleHigh too. - SU->isScheduleHigh = true; - } - } - LoopRegs.Deps.erase(I); - } - } + std::vector<PhysRegSUOper> &DefList = Defs[MO.getReg()]; // clear this register's use list if (Uses.contains(MO.getReg())) @@ -393,11 +330,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // the block. Instead, we leave only one call at the back of the // DefList. if (SU->isCall) { - while (!DefList.empty() && DefList.back()->isCall) + while (!DefList.empty() && DefList.back().SU->isCall) DefList.pop_back(); } // Defs are pushed in the order they are visited and never reordered. - DefList.push_back(SU); + DefList.push_back(PhysRegSUOper(SU, OperIdx)); } } @@ -430,9 +367,12 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { else { SUnit *DefSU = DefI->SU; if (DefSU != SU && DefSU != &ExitSU) { - unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx, - DefSU->getInstr()); - DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg)); + SDep Dep(SU, SDep::Output, Reg); + unsigned OutLatency = + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()); + Dep.setMinLatency(OutLatency); + Dep.setLatency(OutLatency); + DefSU->addPred(Dep); } DefI->SU = SU; } @@ -462,18 +402,17 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { if (DefSU) { // The reaching Def lives within this scheduling region. // Create a data dependence. - // - // TODO: Handle "special" address latencies cleanly. - SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg); - if (!UnitLatencies) { - // Adjust the dependence latency using operand def/use information, then - // allow the target to perform its own adjustments. - unsigned Latency = computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); - dep.setLatency(Latency); - - const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); - ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); - } + SDep dep(DefSU, SDep::Data, Reg); + // Adjust the dependence latency using operand def/use information, then + // allow the target to perform its own adjustments. + int DefOp = Def->findRegisterDefOperandIdx(Reg); + dep.setLatency( + SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false)); + dep.setMinLatency( + SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true)); + + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); SU->addPred(dep); } } @@ -481,14 +420,14 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { // Add antidependence to the following def of the vreg it uses. VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); if (DefI != VRegDefs.end() && DefI->SU != SU) - DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg)); + DefI->SU->addPred(SDep(SU, SDep::Anti, Reg)); } /// Return true if MI is an instruction we are unable to reason about /// (like a call or something with unmodeled side effects). static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { if (MI->isCall() || MI->hasUnmodeledSideEffects() || - (MI->hasVolatileMemoryRef() && + (MI->hasOrderedMemoryRef() && (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) return true; return false; @@ -621,8 +560,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, // and stop descending. if (*Depth > 200 || MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { - SUb->addPred(SDep(SUa, SDep::Order, /*Latency=*/0, /*Reg=*/0, - /*isNormalMemory=*/true)); + SUb->addPred(SDep(SUa, SDep::MayAliasMem)); return *Depth; } // Track current depth. @@ -653,9 +591,9 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, if (SU == *I) continue; if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) { - unsigned Latency = ((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0; - (*I)->addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0, - /*isNormalMemory=*/true)); + SDep Dep(SU, SDep::MayAliasMem); + Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0); + (*I)->addPred(Dep); } // Now go through all the chain successors and iterate from them. // Keep track of visited nodes. @@ -678,9 +616,11 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, // If this is a false dependency, // do not add the edge, but rememeber the rejected node. if (!EnableAASchedMI || - MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) - SUb->addPred(SDep(SUa, SDep::Order, TrueMemOrderLatency, /*Reg=*/0, - isNormalMemory)); + MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); + Dep.setLatency(TrueMemOrderLatency); + SUb->addPred(Dep); + } else { // Duplicate entries should be ignored. RejectList.insert(SUb); @@ -718,10 +658,7 @@ void ScheduleDAGInstrs::initSUnits() { SU->isCommutable = MI->isCommutable(); // Assign the Latency field of SU using target-provided information. - if (UnitLatencies) - SU->Latency = 1; - else - computeLatency(SU); + SU->Latency = SchedModel.computeInstrLatency(SU->getInstr()); } } @@ -825,16 +762,19 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // references, even those that are known to not alias. for (std::map<const Value *, SUnit *>::iterator I = NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { - I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + I->second->addPred(SDep(SU, SDep::Barrier)); } for (std::map<const Value *, std::vector<SUnit *> >::iterator I = NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { - for (unsigned i = 0, e = I->second.size(); i != e; ++i) - I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); + for (unsigned i = 0, e = I->second.size(); i != e; ++i) { + SDep Dep(SU, SDep::Barrier); + Dep.setLatency(TrueMemOrderLatency); + I->second[i]->addPred(Dep); + } } // Add SU to the barrier chain. if (BarrierChain) - BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + BarrierChain->addPred(SDep(SU, SDep::Barrier)); BarrierChain = SU; // This is a barrier event that acts as a pivotal node in the DAG, // so it is safe to clear list of exposed nodes. @@ -922,7 +862,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // SU and barrier _could_ be reordered, they should not. In addition, // we have lost all RejectMemNodes below barrier. if (BarrierChain) - BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + BarrierChain->addPred(SDep(SU, SDep::Barrier)); } else { // Treat all other stores conservatively. goto new_alias_chain; @@ -931,10 +871,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (!ExitSU.isPred(SU)) // Push store's up a bit to avoid them getting in between cmp // and branches. - ExitSU.addPred(SDep(SU, SDep::Order, 0, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); + ExitSU.addPred(SDep(SU, SDep::Artificial)); } else if (MI->mayLoad()) { bool MayAlias = true; if (MI->isInvariantLoad(AA)) { @@ -969,7 +906,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (MayAlias && AliasChain) addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); if (BarrierChain) - BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + BarrierChain->addPred(SDep(SU, SDep::Barrier)); } } } @@ -982,34 +919,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, PendingLoads.clear(); } -void ScheduleDAGInstrs::computeLatency(SUnit *SU) { - // Compute the latency for the node. We only provide a default for missing - // itineraries. Empty itineraries still have latency properties. - if (!InstrItins) { - SU->Latency = 1; - - // Simplistic target-independent heuristic: assume that loads take - // extra time. - if (SU->getInstr()->mayLoad()) - SU->Latency += 2; - } else { - SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr()); - } -} - -unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, - const SDep& dep, - bool FindMin) const { - // For a data dependency with a known register... - if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) - return 1; - - return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(), - Use->getInstr(), dep.getReg(), FindMin); -} - void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) SU->getInstr()->dump(); +#endif } std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { @@ -1029,3 +942,94 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { std::string ScheduleDAGInstrs::getDAGName() const { return "dag." + BB->getFullName(); } + +namespace { +/// \brief Manage the stack used by a reverse depth-first search over the DAG. +class SchedDAGReverseDFS { + std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack; +public: + bool isComplete() const { return DFSStack.empty(); } + + void follow(const SUnit *SU) { + DFSStack.push_back(std::make_pair(SU, SU->Preds.begin())); + } + void advance() { ++DFSStack.back().second; } + + void backtrack() { DFSStack.pop_back(); } + + const SUnit *getCurr() const { return DFSStack.back().first; } + + SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; } + + SUnit::const_pred_iterator getPredEnd() const { + return getCurr()->Preds.end(); + } +}; +} // anonymous + +void ScheduleDAGILP::resize(unsigned NumSUnits) { + ILPValues.resize(NumSUnits); +} + +ILPValue ScheduleDAGILP::getILP(const SUnit *SU) { + return ILPValues[SU->NodeNum]; +} + +// A leaf node has an ILP of 1/1. +static ILPValue initILP(const SUnit *SU) { + unsigned Cnt = SU->getInstr()->isTransient() ? 0 : 1; + return ILPValue(Cnt, 1 + SU->getDepth()); +} + +/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first +/// search from this root. +void ScheduleDAGILP::computeILP(const SUnit *Root) { + if (!IsBottomUp) + llvm_unreachable("Top-down ILP metric is unimplemnted"); + + SchedDAGReverseDFS DFS; + // Mark a node visited by validating it. + ILPValues[Root->NodeNum] = initILP(Root); + DFS.follow(Root); + for (;;) { + // Traverse the leftmost path as far as possible. + while (DFS.getPred() != DFS.getPredEnd()) { + const SUnit *PredSU = DFS.getPred()->getSUnit(); + DFS.advance(); + // If the pred is already valid, skip it. + if (ILPValues[PredSU->NodeNum].isValid()) + continue; + ILPValues[PredSU->NodeNum] = initILP(PredSU); + DFS.follow(PredSU); + } + // Visit the top of the stack in postorder and backtrack. + unsigned PredCount = ILPValues[DFS.getCurr()->NodeNum].InstrCount; + DFS.backtrack(); + if (DFS.isComplete()) + break; + // Add the recently finished predecessor's bottom-up descendent count. + ILPValues[DFS.getCurr()->NodeNum].InstrCount += PredCount; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ILPValue::print(raw_ostream &OS) const { + if (!isValid()) + OS << "BADILP"; + OS << InstrCount << " / " << Cycles << " = " + << format("%g", ((double)InstrCount / Cycles)); +} + +void ILPValue::dump() const { + dbgs() << *this << '\n'; +} + +namespace llvm { + +raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { + Val.print(OS); + return OS; +} + +} // namespace llvm +#endif // !NDEBUG || LLVM_ENABLE_DUMP diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp index 38feee9..6e781b1 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Constants.h" -#include "llvm/Function.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -35,7 +34,7 @@ namespace llvm { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const ScheduleDAG *G) { - return G->MF.getFunction()->getName(); + return G->MF.getName(); } static bool renderGraphFromBottomUp() { diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp index e675366..2cd84d6 100644 --- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -89,6 +89,7 @@ void ScoreboardHazardRecognizer::Reset() { ReservedScoreboard.reset(); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ScoreboardHazardRecognizer::Scoreboard::dump() const { dbgs() << "Scoreboard:\n"; @@ -104,6 +105,7 @@ void ScoreboardHazardRecognizer::Scoreboard::dump() const { dbgs() << '\n'; } } +#endif bool ScoreboardHazardRecognizer::atIssueLimit() const { if (IssueWidth == 0) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4e29879..37d7731 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -23,7 +23,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -194,6 +194,7 @@ namespace { SDValue visitOR(SDNode *N); SDValue visitXOR(SDNode *N); SDValue SimplifyVBinOp(SDNode *N); + SDValue SimplifyVUnaryOp(SDNode *N); SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); @@ -269,6 +270,8 @@ namespace { SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue TransformFPLoadStorePair(SDNode *N); + SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); + SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue GetDemandedBits(SDValue V, const APInt &Mask); @@ -300,6 +303,11 @@ namespace { /// looking for a better chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); + /// Merge consecutive store operations into a wide store. + /// This optimization uses wide integers or vectors when possible. + /// \return True if some memory operations were changed. + bool MergeConsecutiveStores(StoreSDNode *N); + public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), @@ -385,10 +393,6 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, const TargetLowering &TLI, const TargetOptions *Options, unsigned Depth = 0) { - // No compile time optimizations on this type. - if (Op.getValueType() == MVT::ppcf128) - return 0; - // fneg is removable even if it has multiple uses. if (Op.getOpcode() == ISD::FNEG) return 2; @@ -413,7 +417,7 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) return 0; - // fold (fsub (fadd A, B)) -> (fsub (fneg A), B) + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, Depth + 1)) return V; @@ -1643,7 +1647,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return N0.getOperand(0); // fold C2-(A+C1) -> (C2-C1)-A if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { - SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT); + SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), + VT); return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, N1.getOperand(0)); } @@ -2345,16 +2350,19 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // we don't want to undo this promotion. // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper // on scalars. - if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) - && Level == AfterLegalizeTypes) { + if ((N0.getOpcode() == ISD::BITCAST || + N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && + Level == AfterLegalizeTypes) { SDValue In0 = N0.getOperand(0); SDValue In1 = N1.getOperand(0); EVT In0Ty = In0.getValueType(); EVT In1Ty = In1.getValueType(); - // If both incoming values are integers, and the original types are the same. + DebugLoc DL = N->getDebugLoc(); + // If both incoming values are integers, and the original types are the + // same. if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { - SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1); - SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op); + SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); + SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); AddToWorkList(Op.getNode()); return BC; } @@ -2496,8 +2504,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // lanes of the constant together. EVT VT = Vector->getValueType(0); unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); + + // If the splat value has been compressed to a bitlength lower + // than the size of the vector lane, we need to re-expand it to + // the lane size. + if (BitWidth > SplatBitSize) + for (SplatValue = SplatValue.zextOrTrunc(BitWidth); + SplatBitSize < BitWidth; + SplatBitSize = SplatBitSize * 2) + SplatValue |= SplatValue.shl(SplatBitSize); + Constant = APInt::getAllOnesValue(BitWidth); - for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i) + for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); } } @@ -2984,7 +3002,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); - else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), @@ -3202,11 +3220,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if ((LShVal + RShVal) != OpSizeInBits) return 0; - SDValue Rot; - if (HasROTL) - Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt); - else - Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt); + SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, + LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { @@ -3239,12 +3254,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { if (SUBC->getAPIntValue() == OpSizeInBits) { - if (HasROTL) - return DAG.getNode(ISD::ROTL, DL, VT, - LHSShiftArg, LHSShiftAmt).getNode(); - else - return DAG.getNode(ISD::ROTR, DL, VT, - LHSShiftArg, RHSShiftAmt).getNode(); + return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); } } } @@ -3256,25 +3267,21 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { if (SUBC->getAPIntValue() == OpSizeInBits) { - if (HasROTR) - return DAG.getNode(ISD::ROTR, DL, VT, - LHSShiftArg, RHSShiftAmt).getNode(); - else - return DAG.getNode(ISD::ROTL, DL, VT, - LHSShiftArg, LHSShiftAmt).getNode(); + return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, + HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); } } } // Look for sign/zext/any-extended or truncate cases: - if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND - || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND - || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND - || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && - (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND - || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND - || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND - || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { + if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || + LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || + LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || + LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && + (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || + RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || + RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || + RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { SDValue LExtOp0 = LHSShiftAmt.getOperand(0); SDValue RExtOp0 = RHSShiftAmt.getOperand(0); if (RExtOp0.getOpcode() == ISD::SUB && @@ -4046,7 +4053,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (VT.isInteger() && (VT0 == MVT::i1 || (VT0.isInteger() && - TLI.getBooleanContents(false) == TargetLowering::ZeroOrOneBooleanContent)) && + TLI.getBooleanContents(false) == + TargetLowering::ZeroOrOneBooleanContent)) && N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { SDValue XORNode; if (VT == VT0) @@ -4412,20 +4420,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend - else { - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); - if (SVT == MatchingVectorType) { - SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, - N0.getOperand(0), N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); - } + if (SVT == MatchingVectorType) { + SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); } } @@ -5235,13 +5241,12 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // if the source is smaller than the dest, we still need an extend return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); - else if (N0.getOperand(0).getValueType().bitsGT(VT)) + if (N0.getOperand(0).getValueType().bitsGT(VT)) // if the source is larger than the dest, than we just need the truncate return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); - else - // if the source and dest are the same type, we can drop both the extend - // and the truncate. - return N0.getOperand(0); + // if the source and dest are the same type, we can drop both the extend + // and the truncate. + return N0.getOperand(0); } // Fold extract-and-trunc into a narrow extract. For example: @@ -5301,6 +5306,48 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (Reduced.getNode()) return Reduced; } + // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), + // where ... are all 'undef'. + if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { + SmallVector<EVT, 8> VTs; + SDValue V; + unsigned Idx = 0; + unsigned NumDefs = 0; + + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { + SDValue X = N0.getOperand(i); + if (X.getOpcode() != ISD::UNDEF) { + V = X; + Idx = i; + NumDefs++; + } + // Stop if more than one members are non-undef. + if (NumDefs > 1) + break; + VTs.push_back(EVT::getVectorVT(*DAG.getContext(), + VT.getVectorElementType(), + X.getValueType().getVectorNumElements())); + } + + if (NumDefs == 0) + return DAG.getUNDEF(VT); + + if (NumDefs == 1) { + assert(V.getNode() && "The single defined operand is empty!"); + SmallVector<SDValue, 8> Opnds; + for (unsigned i = 0, e = VTs.size(); i != e; ++i) { + if (i != Idx) { + Opnds.push_back(DAG.getUNDEF(VTs[i])); + continue; + } + SDValue NV = DAG.getNode(ISD::TRUNCATE, V.getDebugLoc(), VTs[i], V); + AddToWorkList(NV.getNode()); + Opnds.push_back(NV); + } + return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + &Opnds[0], Opnds.size()); + } + } // Simplify the operands using demanded-bits information. if (!VT.isVector() && @@ -5338,7 +5385,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { !LD2->isVolatile() && DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { unsigned Align = LD1->getAlignment(); - unsigned NewAlign = TLI.getTargetData()-> + unsigned NewAlign = TLI.getDataLayout()-> getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); if (NewAlign <= Align && @@ -5407,7 +5454,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { !cast<LoadSDNode>(N0)->isVolatile() && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - unsigned Align = TLI.getTargetData()-> + unsigned Align = TLI.getDataLayout()-> getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); unsigned OrigAlign = LN0->getAlignment(); @@ -5430,7 +5477,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // This often reduces constant pool loads. if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && - N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { + N0.getNode()->hasOneUse() && VT.isInteger() && + !VT.isVector() && !N0.getValueType().isVector()) { SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, N0.getOperand(0)); AddToWorkList(NewConv.getNode()); @@ -5653,7 +5701,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } // fold (fadd c1, c2) -> c1 + c2 - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) @@ -5664,12 +5712,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return N0; // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) + isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) + isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); @@ -5681,6 +5729,139 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); + // If allow, fold (fadd (fneg x), x) -> 0.0 + if (DAG.getTarget().Options.UnsafeFPMath && + N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) { + return DAG.getConstantFP(0.0, VT); + } + + // If allow, fold (fadd x, (fneg x)) -> 0.0 + if (DAG.getTarget().Options.UnsafeFPMath && + N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) { + return DAG.getConstantFP(0.0, VT); + } + + // In unsafe math mode, we can fold chains of FADD's of the same value + // into multiplications. This transform is not safe in general because + // we are reducing the number of rounding steps. + if (DAG.getTarget().Options.UnsafeFPMath && + TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && + !N0CFP && !N1CFP) { + if (N0.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); + ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); + + // (fadd (fmul c, x), x) -> (fmul c+1, x) + if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP00, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, NewCFP); + } + + // (fadd (fmul x, c), x) -> (fmul c+1, x) + if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, NewCFP); + } + + // (fadd (fadd x, x), x) -> (fmul 3.0, x) + if (!CFP00 && !CFP01 && N0.getOperand(0) == N0.getOperand(1) && + N0.getOperand(0) == N1) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, DAG.getConstantFP(3.0, VT)); + } + + // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x) + if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(1) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP00, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(1), NewCFP); + } + + // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x) + if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), NewCFP); + } + } + + if (N1.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); + ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); + + // (fadd x, (fmul c, x)) -> (fmul c+1, x) + if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP10, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, NewCFP); + } + + // (fadd x, (fmul x, c)) -> (fmul c+1, x) + if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, NewCFP); + } + + // (fadd x, (fadd x, x)) -> (fmul 3.0, x) + if (!CFP10 && !CFP11 && N1.getOperand(0) == N1.getOperand(1) && + N1.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, DAG.getConstantFP(3.0, VT)); + } + + // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x) + if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(1) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP10, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(1), NewCFP); + } + + // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x) + if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), NewCFP); + } + } + + // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) + if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), + DAG.getConstantFP(4.0, VT)); + } + } + // FADD -> FMA combines: if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && @@ -5692,8 +5873,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), N1); } - - // fold (fadd x, (fmul y, z)) -> (fma x, y, z) + + // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, @@ -5719,7 +5900,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // fold (fsub c1, c2) -> c1-c2 - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); // fold (fsub A, 0) -> A if (DAG.getTarget().Options.UnsafeFPMath && @@ -5811,7 +5992,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { } // fold (fmul c1, c2) -> c1*c2 - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) @@ -5867,7 +6048,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + if (DAG.getTarget().Options.UnsafeFPMath) { + if (N0CFP && N0CFP->isZero()) + return N2; + if (N1CFP && N1CFP->isZero()) + return N2; + } if (N0CFP && N0CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2); if (N1CFP && N1CFP->isExactlyValue(1.0)) @@ -5877,6 +6065,58 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N0CFP && !N1CFP) return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2); + // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N2.getOpcode() == ISD::FMUL && + N0 == N2.getOperand(0) && + N2.getOperand(1).getOpcode() == ISD::ConstantFP) { + return DAG.getNode(ISD::FMUL, dl, VT, N0, + DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); + } + + + // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) + if (DAG.getTarget().Options.UnsafeFPMath && + N0.getOpcode() == ISD::FMUL && N1CFP && + N0.getOperand(1).getOpcode() == ISD::ConstantFP) { + return DAG.getNode(ISD::FMA, dl, VT, + N0.getOperand(0), + DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), + N2); + } + + // (fma x, 1, y) -> (fadd x, y) + // (fma x, -1, y) -> (fadd (fneg x), y) + if (N1CFP) { + if (N1CFP->isExactlyValue(1.0)) + return DAG.getNode(ISD::FADD, dl, VT, N0, N2); + + if (N1CFP->isExactlyValue(-1.0) && + (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { + SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); + AddToWorkList(RHSNeg.getNode()); + return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); + } + } + + // (fma x, c, x) -> (fmul x, (c+1)) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) { + return DAG.getNode(ISD::FMUL, dl, VT, + N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(1.0, VT))); + } + + // (fma x, c, (fneg x)) -> (fmul x, (c-1)) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, dl, VT, + N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(-1.0, VT))); + } + + return SDValue(); } @@ -5895,11 +6135,11 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } // fold (fdiv c1, c2) -> c1/c2 - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. - if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) { + if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) { // Compute the reciprocal 1.0 / c2. APFloat N1APF = N1CFP->getValueAPF(); APFloat Recip(N1APF.getSemantics(), 1); // 1.0 @@ -5942,7 +6182,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { EVT VT = N->getValueType(0); // fold (frem c1, c2) -> fmod(c1,c2) - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); return SDValue(); @@ -5955,7 +6195,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); - if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold + if (N0CFP && N1CFP) // Constant fold return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); if (N1CFP) { @@ -6005,7 +6245,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp - if (N0C && OpVT != MVT::ppcf128 && + if (N0C && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -6062,7 +6302,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp - if (N0C && OpVT != MVT::ppcf128 && + if (N0C && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -6117,7 +6357,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { EVT VT = N->getValueType(0); // fold (fp_to_uint c1fp) -> c1 - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); return SDValue(); @@ -6130,7 +6370,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { EVT VT = N->getValueType(0); // fold (fp_round c1fp) -> c1fp - if (N0CFP && N0.getValueType() != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); // fold (fp_round (fp_extend x)) -> x @@ -6184,7 +6424,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { return SDValue(); // fold (fp_extend c1fp) -> c1fp - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the @@ -6225,6 +6465,11 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVUnaryOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options)) return GetNegatedExpression(N0, DAG, LegalOperations); @@ -6246,6 +6491,17 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { } } + // (fneg (fmul c, x)) -> (fmul -c, x) + if (N0.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); + if (CFP1) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), + DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, + N0.getOperand(1))); + } + } + return SDValue(); } @@ -6255,7 +6511,7 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) { EVT VT = N->getValueType(0); // fold (fceil c1) -> fceil(c1) - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); return SDValue(); @@ -6267,7 +6523,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { EVT VT = N->getValueType(0); // fold (ftrunc c1) -> ftrunc(c1) - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); return SDValue(); @@ -6279,7 +6535,7 @@ SDValue DAGCombiner::visitFFLOOR(SDNode *N) { EVT VT = N->getValueType(0); // fold (ffloor c1) -> ffloor(c1) - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); return SDValue(); @@ -6290,8 +6546,13 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVUnaryOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (fabs c1) -> fabs(c1) - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); // fold (fabs (fabs x)) -> (fabs x) if (N0.getOpcode() == ISD::FABS) @@ -6511,7 +6772,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, } else return false; - TargetLowering::AddrMode AM; + AddrMode AM; if (N->getOpcode() == ISD::ADD) { ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); if (Offset) @@ -7138,7 +7399,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); - if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy)) + if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) return SDValue(); SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), @@ -7200,7 +7461,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { unsigned LDAlign = LD->getAlignment(); unsigned STAlign = ST->getAlignment(); Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); - unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy); + unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy); if (LDAlign < ABIAlign || STAlign < ABIAlign) return SDValue(); @@ -7225,6 +7486,433 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { return SDValue(); } +/// Returns the base pointer and an integer offset from that object. +static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) { + if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) { + int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); + SDValue Base = Ptr->getOperand(0); + return std::make_pair(Base, Offset); + } + + return std::make_pair(Ptr, 0); +} + +/// Holds a pointer to an LSBaseSDNode as well as information on where it +/// is located in a sequence of memory operations connected by a chain. +struct MemOpLink { + MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): + MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } + // Ptr to the mem node. + LSBaseSDNode *MemNode; + // Offset from the base ptr. + int64_t OffsetFromBase; + // What is the sequence number of this mem node. + // Lowest mem operand in the DAG starts at zero. + unsigned SequenceNum; +}; + +/// Sorts store nodes in a link according to their offset from a shared +// base ptr. +struct ConsecutiveMemoryChainSorter { + bool operator()(MemOpLink LHS, MemOpLink RHS) { + return LHS.OffsetFromBase < RHS.OffsetFromBase; + } +}; + +bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { + EVT MemVT = St->getMemoryVT(); + int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; + + // Don't merge vectors into wider inputs. + if (MemVT.isVector() || !MemVT.isSimple()) + return false; + + // Perform an early exit check. Do not bother looking at stored values that + // are not constants or loads. + SDValue StoredVal = St->getValue(); + bool IsLoadSrc = isa<LoadSDNode>(StoredVal); + if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) && + !IsLoadSrc) + return false; + + // Only look at ends of store sequences. + SDValue Chain = SDValue(St, 1); + if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) + return false; + + // This holds the base pointer and the offset in bytes from the base pointer. + std::pair<SDValue, int64_t> BasePtr = + GetPointerBaseAndOffset(St->getBasePtr()); + + // We must have a base and an offset. + if (!BasePtr.first.getNode()) + return false; + + // Do not handle stores to undef base pointers. + if (BasePtr.first.getOpcode() == ISD::UNDEF) + return false; + + SmallVector<MemOpLink, 8> StoreNodes; + // Walk up the chain and look for nodes with offsets from the same + // base pointer. Stop when reaching an instruction with a different kind + // or instruction which has a different base pointer. + unsigned Seq = 0; + StoreSDNode *Index = St; + while (Index) { + // If the chain has more than one use, then we can't reorder the mem ops. + if (Index != St && !SDValue(Index, 1)->hasOneUse()) + break; + + // Find the base pointer and offset for this memory node. + std::pair<SDValue, int64_t> Ptr = + GetPointerBaseAndOffset(Index->getBasePtr()); + + // Check that the base pointer is the same as the original one. + if (Ptr.first.getNode() != BasePtr.first.getNode()) + break; + + // Check that the alignment is the same. + if (Index->getAlignment() != St->getAlignment()) + break; + + // The memory operands must not be volatile. + if (Index->isVolatile() || Index->isIndexed()) + break; + + // No truncation. + if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index)) + if (St->isTruncatingStore()) + break; + + // The stored memory type must be the same. + if (Index->getMemoryVT() != MemVT) + break; + + // We do not allow unaligned stores because we want to prevent overriding + // stores. + if (Index->getAlignment()*8 != MemVT.getSizeInBits()) + break; + + // We found a potential memory operand to merge. + StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++)); + + // Move up the chain to the next memory operation. + Index = dyn_cast<StoreSDNode>(Index->getChain().getNode()); + } + + // Check if there is anything to merge. + if (StoreNodes.size() < 2) + return false; + + // Sort the memory operands according to their distance from the base pointer. + std::sort(StoreNodes.begin(), StoreNodes.end(), + ConsecutiveMemoryChainSorter()); + + // Scan the memory operations on the chain and find the first non-consecutive + // store memory address. + unsigned LastConsecutiveStore = 0; + int64_t StartAddress = StoreNodes[0].OffsetFromBase; + for (unsigned i=1; i<StoreNodes.size(); ++i) { + int64_t CurrAddress = StoreNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + + // Mark this node as useful. + LastConsecutiveStore = i; + } + + // The node with the lowest store address. + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + + // Store the constants into memory as one consecutive store. + if (!IsLoadSrc) { + unsigned LastLegalType = 0; + unsigned LastLegalVectorType = 0; + bool NonZero = false; + for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue StoredVal = St->getValue(); + + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) { + NonZero |= !C->isNullValue(); + } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) { + NonZero |= !C->getConstantFPValue()->isNullValue(); + } else { + // Non constant. + break; + } + + // Find a legal type for the constant store. + unsigned StoreBW = (i+1) * ElementSizeBytes * 8; + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + if (TLI.isTypeLegal(StoreTy)) + LastLegalType = i+1; + + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + if (TLI.isTypeLegal(Ty)) + LastLegalVectorType = i + 1; + } + + // We only use vectors if the constant is known to be zero. + if (NonZero) + LastLegalVectorType = 0; + + // Check if we found a legal integer type to store. + if (LastLegalType == 0 && LastLegalVectorType == 0) + return false; + + bool UseVector = LastLegalVectorType > LastLegalType; + unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; + + // Make sure we have something to merge. + if (NumElem < 2) + return false; + + unsigned EarliestNodeUsed = 0; + for (unsigned i=0; i < NumElem; ++i) { + // Find a chain for the new wide-store operand. Notice that some + // of the store nodes that we found may not be selected for inclusion + // in the wide store. The chain we use needs to be the chain of the + // earliest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) + EarliestNodeUsed = i; + } + + // The earliest Node in the DAG. + LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc(); + + SDValue StoredVal; + if (UseVector) { + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); + StoredVal = DAG.getConstant(0, Ty); + } else { + unsigned StoreBW = NumElem * ElementSizeBytes * 8; + APInt StoreInt(StoreBW, 0); + + // Construct a single integer constant which is made of the smaller + // constant inputs. + bool IsLE = TLI.isLittleEndian(); + for (unsigned i = 0; i < NumElem ; ++i) { + unsigned Idx = IsLE ?(NumElem - 1 - i) : i; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); + SDValue Val = St->getValue(); + StoreInt<<=ElementSizeBytes*8; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { + StoreInt|=C->getAPIntValue().zext(StoreBW); + } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { + StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); + } else { + assert(false && "Invalid constant element type"); + } + } + + // Create the new Load and Store operations. + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + StoredVal = DAG.getConstant(StoreInt, StoreTy); + } + + SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + false, false, + FirstInChain->getAlignment()); + + // Replace the first store with the new store + CombineTo(EarliestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumElem ; ++i) { + if (StoreNodes[i].MemNode == EarliestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + // ReplaceAllUsesWith will replace all uses that existed when it was + // called, but graph optimizations may cause new ones to appear. For + // example, the case in pr14333 looks like + // + // St's chain -> St -> another store -> X + // + // And the only difference from St to the other store is the chain. + // When we change it's chain to be St's chain they become identical, + // get CSEed and the net result is that X is now a use of St. + // Since we know that St is redundant, just iterate. + while (!St->use_empty()) + DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); + removeFromWorkList(St); + DAG.DeleteNode(St); + } + + return true; + } + + // Below we handle the case of multiple consecutive stores that + // come from multiple consecutive loads. We merge them into a single + // wide load and a single wide store. + + // Look for load nodes which are used by the stored values. + SmallVector<MemOpLink, 8> LoadNodes; + + // Find acceptable loads. Loads need to have the same chain (token factor), + // must not be zext, volatile, indexed, and they must be consecutive. + SDValue LdBasePtr; + for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue()); + if (!Ld) break; + + // Loads must only have one use. + if (!Ld->hasNUsesOfValue(1, 0)) + break; + + // Check that the alignment is the same as the stores. + if (Ld->getAlignment() != St->getAlignment()) + break; + + // The memory operands must not be volatile. + if (Ld->isVolatile() || Ld->isIndexed()) + break; + + // We do not accept ext loads. + if (Ld->getExtensionType() != ISD::NON_EXTLOAD) + break; + + // The stored memory type must be the same. + if (Ld->getMemoryVT() != MemVT) + break; + + std::pair<SDValue, int64_t> LdPtr = + GetPointerBaseAndOffset(Ld->getBasePtr()); + + // If this is not the first ptr that we check. + if (LdBasePtr.getNode()) { + // The base ptr must be the same. + if (LdPtr.first != LdBasePtr) + break; + } else { + // Check that all other base pointers are the same as this one. + LdBasePtr = LdPtr.first; + } + + // We found a potential memory operand to merge. + LoadNodes.push_back(MemOpLink(Ld, LdPtr.second, 0)); + } + + if (LoadNodes.size() < 2) + return false; + + // Scan the memory operations on the chain and find the first non-consecutive + // load memory address. These variables hold the index in the store node + // array. + unsigned LastConsecutiveLoad = 0; + // This variable refers to the size and not index in the array. + unsigned LastLegalVectorType = 0; + unsigned LastLegalIntegerType = 0; + StartAddress = LoadNodes[0].OffsetFromBase; + SDValue FirstChain = LoadNodes[0].MemNode->getChain(); + for (unsigned i = 1; i < LoadNodes.size(); ++i) { + // All loads much share the same chain. + if (LoadNodes[i].MemNode->getChain() != FirstChain) + break; + + int64_t CurrAddress = LoadNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + LastConsecutiveLoad = i; + + // Find a legal type for the vector store. + EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + if (TLI.isTypeLegal(StoreTy)) + LastLegalVectorType = i + 1; + + // Find a legal type for the integer store. + unsigned StoreBW = (i+1) * ElementSizeBytes * 8; + StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + if (TLI.isTypeLegal(StoreTy)) + LastLegalIntegerType = i + 1; + } + + // Only use vector types if the vector type is larger than the integer type. + // If they are the same, use integers. + bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType; + unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); + + // We add +1 here because the LastXXX variables refer to location while + // the NumElem refers to array/index size. + unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1; + NumElem = std::min(LastLegalType, NumElem); + + if (NumElem < 2) + return false; + + // The earliest Node in the DAG. + unsigned EarliestNodeUsed = 0; + LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + for (unsigned i=1; i<NumElem; ++i) { + // Find a chain for the new wide-store operand. Notice that some + // of the store nodes that we found may not be selected for inclusion + // in the wide store. The chain we use needs to be the chain of the + // earliest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) + EarliestNodeUsed = i; + } + + // Find if it is better to use vectors or integers to load and store + // to memory. + EVT JointMemOpVT; + if (UseVectorTy) { + JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + } else { + unsigned StoreBW = NumElem * ElementSizeBytes * 8; + JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + } + + DebugLoc LoadDL = LoadNodes[0].MemNode->getDebugLoc(); + DebugLoc StoreDL = StoreNodes[0].MemNode->getDebugLoc(); + + LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); + SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, + FirstLoad->getChain(), + FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), + false, false, false, + FirstLoad->getAlignment()); + + SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), false, false, + FirstInChain->getAlignment()); + + // Replace one of the loads with the new load. + LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), + SDValue(NewLoad.getNode(), 1)); + + // Remove the rest of the load chains. + for (unsigned i = 1; i < NumElem ; ++i) { + // Replace all chain users of the old load nodes with the chain of the new + // load node. + LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); + } + + // Replace the first store with the new store. + CombineTo(EarliestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumElem ; ++i) { + // Remove all Store nodes. + if (StoreNodes[i].MemNode == EarliestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); + removeFromWorkList(St); + DAG.DeleteNode(St); + } + + return true; +} + SDValue DAGCombiner::visitSTORE(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); SDValue Chain = ST->getChain(); @@ -7237,7 +7925,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->isUnindexed()) { unsigned OrigAlign = ST->getAlignment(); EVT SVT = Value.getOperand(0).getValueType(); - unsigned Align = TLI.getTargetData()-> + unsigned Align = TLI.getDataLayout()-> getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); if (Align <= OrigAlign && ((!LegalOperations && !ST->isVolatile()) || @@ -7426,6 +8114,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->getAlignment()); } + // Only perform this optimization before the types are legal, because we + // don't want to perform this optimization on every DAGCombine invocation. + if (!LegalTypes && MergeConsecutiveStores(ST)) + return SDValue(N, 0); + return ReduceLoadOpStoreWidth(N); } @@ -7504,9 +8197,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. // We only perform this optimization before the op legalization phase because - // we may introduce new vector instructions which are not backed by TD patterns. - // For example on AVX, extracting elements from a wide vector without using - // extract_subvector. + // we may introduce new vector instructions which are not backed by TD + // patterns. For example on AVX, extracting elements from a wide vector + // without using extract_subvector. if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE && ConstEltNo && !LegalOperations) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); @@ -7625,7 +8318,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Check the resultant load doesn't need a higher alignment than the // original load. unsigned NewAlign = - TLI.getTargetData() + TLI.getDataLayout() ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) @@ -7690,15 +8383,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { +// Simplify (build_vec (ext )) to (bitcast (build_vec )) +SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { + // We perform this optimization post type-legalization because + // the type-legalizer often scalarizes integer-promoted vectors. + // Performing this optimization before may create bit-casts which + // will be type-legalized to complex code sequences. + // We perform this optimization only before the operation legalizer because we + // may introduce illegal operations. + if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) + return SDValue(); + unsigned NumInScalars = N->getNumOperands(); DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); - // A vector built entirely of undefs is undef. - if (ISD::allOperandsUndef(N)) - return DAG.getUNDEF(VT); - // Check to see if this is a BUILD_VECTOR of a bunch of values // which come from any_extend or zero_extend nodes. If so, we can create // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR @@ -7741,64 +8440,141 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // In order to have valid types, all of the inputs must be extended from the // same source type and all of the inputs must be any or zero extend. // Scalar sizes must be a power of two. - EVT OutScalarTy = N->getValueType(0).getScalarType(); + EVT OutScalarTy = VT.getScalarType(); bool ValidTypes = SourceType != MVT::Other && isPowerOf2_32(OutScalarTy.getSizeInBits()) && isPowerOf2_32(SourceType.getSizeInBits()); - // We perform this optimization post type-legalization because - // the type-legalizer often scalarizes integer-promoted vectors. - // Performing this optimization before may create bit-casts which - // will be type-legalized to complex code sequences. - // We perform this optimization only before the operation legalizer because we - // may introduce illegal operations. // Create a new simpler BUILD_VECTOR sequence which other optimizations can // turn into a single shuffle instruction. - if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) && - ValidTypes) { - bool isLE = TLI.isLittleEndian(); - unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); - assert(ElemRatio > 1 && "Invalid element size ratio"); - SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): - DAG.getConstant(0, SourceType); - - unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements(); - SmallVector<SDValue, 8> Ops(NewBVElems, Filler); - - // Populate the new build_vector - for (unsigned i=0; i < N->getNumOperands(); ++i) { - SDValue Cast = N->getOperand(i); - assert((Cast.getOpcode() == ISD::ANY_EXTEND || - Cast.getOpcode() == ISD::ZERO_EXTEND || - Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); - SDValue In; - if (Cast.getOpcode() == ISD::UNDEF) - In = DAG.getUNDEF(SourceType); - else - In = Cast->getOperand(0); - unsigned Index = isLE ? (i * ElemRatio) : - (i * ElemRatio + (ElemRatio - 1)); + if (!ValidTypes) + return SDValue(); + + bool isLE = TLI.isLittleEndian(); + unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); + assert(ElemRatio > 1 && "Invalid element size ratio"); + SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): + DAG.getConstant(0, SourceType); + + unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); + SmallVector<SDValue, 8> Ops(NewBVElems, Filler); + + // Populate the new build_vector + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Cast = N->getOperand(i); + assert((Cast.getOpcode() == ISD::ANY_EXTEND || + Cast.getOpcode() == ISD::ZERO_EXTEND || + Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); + SDValue In; + if (Cast.getOpcode() == ISD::UNDEF) + In = DAG.getUNDEF(SourceType); + else + In = Cast->getOperand(0); + unsigned Index = isLE ? (i * ElemRatio) : + (i * ElemRatio + (ElemRatio - 1)); + + assert(Index < Ops.size() && "Invalid index"); + Ops[Index] = In; + } + + // The type of the new BUILD_VECTOR node. + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); + assert(VecVT.getSizeInBits() == VT.getSizeInBits() && + "Invalid vector size"); + // Check if the new vector type is legal. + if (!isTypeLegal(VecVT)) return SDValue(); + + // Make the new BUILD_VECTOR. + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size()); + + // The new BUILD_VECTOR node has the potential to be further optimized. + AddToWorkList(BV.getNode()); + // Bitcast to the desired type. + return DAG.getNode(ISD::BITCAST, dl, VT, BV); +} + +SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { + EVT VT = N->getValueType(0); + + unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + + EVT SrcVT = MVT::Other; + unsigned Opcode = ISD::DELETED_NODE; + unsigned NumDefs = 0; - assert(Index < Ops.size() && "Invalid index"); - Ops[Index] = In; + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + unsigned Opc = In.getOpcode(); + + if (Opc == ISD::UNDEF) + continue; + + // If all scalar values are floats and converted from integers. + if (Opcode == ISD::DELETED_NODE && + (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { + Opcode = Opc; + // If not supported by target, bail out. + if (TLI.getOperationAction(Opcode, VT) != TargetLowering::Legal && + TLI.getOperationAction(Opcode, VT) != TargetLowering::Custom) + return SDValue(); } + if (Opc != Opcode) + return SDValue(); - // The type of the new BUILD_VECTOR node. - EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); - assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() && - "Invalid vector size"); - // Check if the new vector type is legal. - if (!isTypeLegal(VecVT)) return SDValue(); + EVT InVT = In.getOperand(0).getValueType(); - // Make the new BUILD_VECTOR. - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - VecVT, &Ops[0], Ops.size()); + // If all scalar values are typed differently, bail out. It's chosen to + // simplify BUILD_VECTOR of integer types. + if (SrcVT == MVT::Other) + SrcVT = InVT; + if (SrcVT != InVT) + return SDValue(); + NumDefs++; + } + + // If the vector has just one element defined, it's not worth to fold it into + // a vectorized one. + if (NumDefs < 2) + return SDValue(); - // The new BUILD_VECTOR node has the potential to be further optimized. - AddToWorkList(BV.getNode()); - // Bitcast to the desired type. - return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV); + assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) + && "Should only handle conversion from integer to float."); + assert(SrcVT != MVT::Other && "Cannot determine source type!"); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); + SmallVector<SDValue, 8> Opnds; + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + + if (In.getOpcode() == ISD::UNDEF) + Opnds.push_back(DAG.getUNDEF(SrcVT)); + else + Opnds.push_back(In.getOperand(0)); } + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, + &Opnds[0], Opnds.size()); + AddToWorkList(BV.getNode()); + + return DAG.getNode(Opcode, dl, VT, BV); +} + +SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { + unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // A vector built entirely of undefs is undef. + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(VT); + + SDValue V = reduceBuildVecExtToExtBuildVec(N); + if (V.getNode()) + return V; + + V = reduceBuildVecConvertToConvertBuildVec(N); + if (V.getNode()) + return V; // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from @@ -7876,15 +8652,22 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) return SDValue(); + // If the input vector type has a different base type to the output + // vector type, bail out. + if (VecIn1.getValueType().getVectorElementType() != + VT.getVectorElementType()) + return SDValue(); + // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, DAG.getUNDEF(VecIn1.getValueType())); } // If VecIn2 is unused then change it to undef. VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); - // Check that we were able to transform all incoming values to the same type. + // Check that we were able to transform all incoming values to the same + // type. if (VecIn2.getValueType() != VecIn1.getValueType() || VecIn1.getValueType() != VT) return SDValue(); @@ -7897,7 +8680,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { SDValue Ops[2]; Ops[0] = VecIn1; Ops[1] = VecIn2; - return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); + return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]); } return SDValue(); @@ -7933,8 +8716,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { return SDValue(); // Only handle cases where both indexes are constants with the same type. - ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); - ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); + ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); + ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); if (InsIdx && ExtIdx && InsIdx->getValueType(0).getSizeInBits() <= 64 && @@ -7951,6 +8734,21 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { } } + if (V->getOpcode() == ISD::CONCAT_VECTORS) { + // Combine: + // (extract_subvec (concat V1, V2, ...), i) + // Into: + // Vi if possible + // Only operand 0 is checked as 'concat' assumes all inputs of the same type. + if (V->getOperand(0).getValueType() != NVT) + return SDValue(); + unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned NumElems = NVT.getVectorNumElements(); + assert((Idx % NumElems) == 0 && + "IDX in concat is not a multiple of the result vector length."); + return V->getOperand(Idx / NumElems); + } + return SDValue(); } @@ -8266,6 +9064,44 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { return SDValue(); } +/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG. +SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { + // After legalize, the target may be depending on adds and other + // binary ops to provide legal ways to construct constants or other + // things. Simplifying them may result in a loss of legality. + if (LegalOperations) return SDValue(); + + assert(N->getValueType(0).isVector() && + "SimplifyVUnaryOp only works on vectors!"); + + SDValue N0 = N->getOperand(0); + + if (N0.getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + + // Operand is a BUILD_VECTOR node, see if we can constant fold it. + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { + SDValue Op = N0.getOperand(i); + if (Op.getOpcode() != ISD::UNDEF && + Op.getOpcode() != ISD::ConstantFP) + break; + EVT EltVT = Op.getValueType(); + SDValue FoldOp = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), EltVT, Op); + if (FoldOp.getOpcode() != ISD::UNDEF && + FoldOp.getOpcode() != ISD::ConstantFP) + break; + Ops.push_back(FoldOp); + AddToWorkList(FoldOp.getNode()); + } + + if (Ops.size() != N0.getNumOperands()) + return SDValue(); + + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + N0.getValueType(), &Ops[0], Ops.size()); +} + SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2){ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); @@ -8349,6 +9185,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) return false; + // The loads must not depend on one another. + if (LLD->isPredecessorOf(RLD) || + RLD->isPredecessorOf(LLD)) + return false; Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), LLD->getBasePtr().getValueType(), TheSelect->getOperand(0), LLD->getBasePtr(), @@ -8468,7 +9308,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, const_cast<ConstantFP*>(TV->getConstantFPValue()) }; Type *FPTy = Elts[0]->getType(); - const TargetData &TD = *TLI.getTargetData(); + const DataLayout &TD = *TLI.getDataLayout(); // Create a ConstantArray of the two constants. Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); @@ -8583,34 +9423,38 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, return SDValue(); // Get a SetCC of the condition - // FIXME: Should probably make sure that setcc is legal if we ever have a - // target where it isn't. - SDValue Temp, SCC; - // cast from setcc result type to select result type - if (LegalTypes) { - SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), - N0, N1, CC); - if (N2.getValueType().bitsLT(SCC.getValueType())) - Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType()); - else + // NOTE: Don't create a SETCC if it's not legal on this target. + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, + LegalTypes ? TLI.getSetCCResultType(N0.getValueType()) : MVT::i1)) { + SDValue Temp, SCC; + // cast from setcc result type to select result type + if (LegalTypes) { + SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC); + if (N2.getValueType().bitsLT(SCC.getValueType())) + Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), + N2.getValueType()); + else + Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + N2.getValueType(), SCC); + } else { + SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), N2.getValueType(), SCC); - } else { - SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); - Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), - N2.getValueType(), SCC); - } + } - AddToWorkList(SCC.getNode()); - AddToWorkList(Temp.getNode()); + AddToWorkList(SCC.getNode()); + AddToWorkList(Temp.getNode()); - if (N2C->getAPIntValue() == 1) - return Temp; + if (N2C->getAPIntValue() == 1) + return Temp; - // shl setcc result by log2 n2c - return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(N2C->getAPIntValue().logBase2(), - getShiftAmountTy(Temp.getValueType()))); + // shl setcc result by log2 n2c + return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), + getShiftAmountTy(Temp.getValueType()))); + } } // Check to see if this is the equivalent of setcc @@ -8729,7 +9573,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { // to alias with anything but itself. Provides base object and offset as // results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, - const GlobalValue *&GV, void *&CV) { + const GlobalValue *&GV, const void *&CV) { // Assume it is a primitive operation. Base = Ptr; Offset = 0; GV = 0; CV = 0; @@ -8754,8 +9598,8 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, // for ConstantSDNodes since the same constant pool entry may be represented // by multiple nodes with different offsets. if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) { - CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal() - : (void *)C->getConstVal(); + CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal() + : (const void *)C->getConstVal(); Offset += C->getOffset(); return false; } @@ -8780,7 +9624,7 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, SDValue Base1, Base2; int64_t Offset1, Offset2; const GlobalValue *GV1, *GV2; - void *CV1, *CV2; + const void *CV1, *CV2; bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 683fac6..4854cf7 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -53,7 +53,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Analysis/Loads.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" @@ -1059,7 +1059,7 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo, MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), TM(FuncInfo.MF->getTarget()), - TD(*TM.getTargetData()), + TD(*TM.getDataLayout()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), TRI(*TM.getRegisterInfo()), diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 3e18ea7..a418290 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -29,7 +29,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" @@ -80,9 +80,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { Type *Ty = AI->getAllocatedType(); - uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), AI->getAlignment()); TySize *= CUI->getZExtValue(); // Get total allocated size. @@ -97,7 +97,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = MF->getFrameInfo()->CreateStackObject(TySize, Align, false, - MayNeedSP); + MayNeedSP, AI); } for (; BB != EB; ++BB) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 4488d27..a8381b2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -20,7 +20,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -55,7 +55,8 @@ unsigned InstrEmitter::CountResults(SDNode *Node) { /// /// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding /// the chain and glue. These operands may be implicit on the machine instr. -static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) { +static unsigned countOperands(SDNode *Node, unsigned NumExpUses, + unsigned &NumImpUses) { unsigned N = Node->getNumOperands(); while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) --N; @@ -63,7 +64,8 @@ static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) { --N; // Ignore chain if it exists. // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses. - for (unsigned I = N; I; --I) { + NumImpUses = N - NumExpUses; + for (unsigned I = N; I > NumExpUses; --I) { if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1))) continue; if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1))) @@ -312,8 +314,6 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, const TargetRegisterClass *DstRC = 0; if (IIOpNum < II->getNumOperands()) DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF)); - assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) && - "Don't have operand info for this instruction!"); if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), @@ -390,10 +390,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, Type *Type = CP->getType(); // MachineConstantPool wants an explicit alignment. if (Align == 0) { - Align = TM->getTargetData()->getPrefTypeAlignment(Type); + Align = TM->getDataLayout()->getPrefTypeAlignment(Type); if (Align == 0) { // Alignment of vector types. FIXME! - Align = TM->getTargetData()->getTypeAllocSize(Type); + Align = TM->getDataLayout()->getTypeAllocSize(Type); } } @@ -410,6 +410,7 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, ES->getTargetFlags())); } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) { MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(), + BA->getOffset(), BA->getTargetFlags())); } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) { MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(), @@ -720,7 +721,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NumImpUses = 0; - unsigned NodeOperands = countOperands(Node, NumImpUses); + unsigned NodeOperands = + countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses); bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; @@ -870,6 +872,17 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, break; } + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: { + unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ? + TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END; + + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1)); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp)) + .addFrameIndex(FI->getIndex()); + break; + } + case ISD::INLINEASM: { unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) @@ -884,25 +897,30 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol(); MI->addOperand(MachineOperand::CreateES(AsmStr)); - // Add the HasSideEffect and isAlignStack bits. + // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore + // bits. int64_t ExtraInfo = cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))-> getZExtValue(); MI->addOperand(MachineOperand::CreateImm(ExtraInfo)); + // Remember to operand index of the group flags. + SmallVector<unsigned, 8> GroupIdx; + // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); - unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + GroupIdx.push_back(MI->getNumOperands()); MI->addOperand(MachineOperand::CreateImm(Flags)); ++i; // Skip the ID value. switch (InlineAsm::getKind(Flags)) { default: llvm_unreachable("Bad flags!"); case InlineAsm::Kind_RegDef: - for (; NumVals; --NumVals, ++i) { + for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); // FIXME: Add dead flags for physical and virtual registers defined. // For now, mark physical register defs as implicit to help fast @@ -913,7 +931,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, break; case InlineAsm::Kind_RegDefEarlyClobber: case InlineAsm::Kind_Clobber: - for (; NumVals; --NumVals, ++i) { + for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true, /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg), @@ -928,9 +946,20 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case InlineAsm::Kind_Mem: // Addressing mode. // The addressing mode has been selected, just add all of the // operands to the machine instruction. - for (; NumVals; --NumVals, ++i) + for (unsigned j = 0; j != NumVals; ++j, ++i) AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); + + // Manually set isTied bits. + if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) { + unsigned DefGroup = 0; + if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) { + unsigned DefIdx = GroupIdx[DefGroup] + 1; + unsigned UseIdx = GroupIdx.back() + 1; + for (unsigned j = 0; j != NumVals; ++j) + MI->tieOperands(DefIdx + j, UseIdx + j); + } + } break; } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 908ebb9..abf40b7 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -22,7 +22,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -718,7 +718,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // expand it. if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); @@ -824,7 +824,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // expand it. if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); } @@ -869,25 +869,24 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: - // If this is an unaligned load and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = - TLI.getTargetData()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), - DAG, TLI, RVal, RChain); - } - } - break; + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getDataLayout()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain); + } + } + break; case TargetLowering::Custom: { - SDValue Res = TLI.LowerOperation(RVal, DAG); - if (Res.getNode()) { - RVal = Res; - RChain = Res.getValue(1); - } - break; + SDValue Res = TLI.LowerOperation(RVal, DAG); + if (Res.getNode()) { + RVal = Res; + RChain = Res.getValue(1); + } + break; } case TargetLowering::Promote: { // Only promote a load of vector type to another. @@ -1060,7 +1059,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = - TLI.getTargetData()->getABITypeAlignment(Ty); + TLI.getDataLayout()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain); @@ -1241,6 +1240,19 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; + case ISD::DEBUGTRAP: + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Expand) { + // replace ISD::DEBUGTRAP with ISD::TRAP + SDValue NewVal; + NewVal = DAG.getNode(ISD::TRAP, Node->getDebugLoc(), Node->getVTList(), + Node->getOperand(0)); + ReplaceNode(Node, NewVal.getNode()); + LegalizeOp(NewVal.getNode()); + return; + } + break; + default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -1588,26 +1600,71 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, break; case TargetLowering::Expand: { ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; + ISD::CondCode InvCC = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { default: llvm_unreachable("Don't know how to expand this condition!"); - case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR; break; - // FIXME: Implement more expansions. - } - - SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); - SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + case ISD::SETO: + assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT) + == TargetLowering::Legal + && "If SETO is expanded, SETOEQ must be legal!"); + CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; + case ISD::SETUO: + assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT) + == TargetLowering::Legal + && "If SETUO is expanded, SETUNE must be legal!"); + CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break; + case ISD::SETOEQ: + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETONE: + case ISD::SETUEQ: + case ISD::SETUNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: + // If we are floating point, assign and break, otherwise fall through. + if (!OpVT.isInteger()) { + // We can use the 4th bit to tell if we are the unordered + // or ordered version of the opcode. + CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO; + Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND; + CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10); + break; + } + // Fallthrough if we are unsigned integer. + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETLT: + case ISD::SETNE: + case ISD::SETEQ: + InvCC = ISD::getSetCCSwappedOperands(CCCode); + if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) { + // We only support using the inverted operation and not a + // different manner of supporting expanding these cases. + llvm_unreachable("Don't know how to expand this condition!"); + } + LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC); + RHS = SDValue(); + CC = SDValue(); + return; + } + + SDValue SetCC1, SetCC2; + if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { + // If we aren't the ordered or unorder operation, + // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + } else { + // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2); + } LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); RHS = SDValue(); CC = SDValue(); @@ -1626,7 +1683,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, DebugLoc dl) { // Create the stack frame object. unsigned SrcAlign = - TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType(). + TLI.getDataLayout()->getPrefTypeAlignment(SrcOp.getValueType(). getTypeForEVT(*DAG.getContext())); SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign); @@ -1638,7 +1695,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, unsigned SlotSize = SlotVT.getSizeInBits(); unsigned DestSize = DestVT.getSizeInBits(); Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); - unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType); + unsigned DestAlign = TLI.getDataLayout()->getPrefTypeAlignment(DestType); // Emit a store to the stack slot. Use a truncstore if the input value is // later than DestVT. @@ -2042,7 +2099,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, DebugLoc dl) { - if (Op0.getValueType() == MVT::i32) { + if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { // simple 32-bit [signed|unsigned] integer to float/double expansion // Get the stack frame index of a 8 byte buffer. @@ -2787,7 +2844,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Increment the pointer, VAList, to the next vaarg Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, - DAG.getConstant(TLI.getTargetData()-> + DAG.getConstant(TLI.getDataLayout()-> getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), TLI.getPointerTy())); // Store the incremented VAList to the legalized pointer @@ -3109,6 +3166,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp3 = Node->getOperand(1); if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || (isDivRemLibcallAvailable(Node, isSigned, TLI) && + // If div is legal, it's better to do the normal expansion + !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) && useDivRem(Node, isSigned, false))) { Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { @@ -3366,7 +3425,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT PTy = TLI.getPointerTy(); - const TargetData &TD = *TLI.getTargetData(); + const DataLayout &TD = *TLI.getDataLayout(); unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index e393896..92dc5a9 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1245,32 +1245,30 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); - if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType()) - == TargetLowering::Custom) - Res = TLI.LowerOperation(SDValue(N, 0), DAG); - - if (Res.getNode() == 0) { - switch (N->getOpcode()) { - default: - #ifndef NDEBUG - dbgs() << "ExpandFloatOperand Op #" << OpNo << ": "; - N->dump(&DAG); dbgs() << "\n"; - #endif - llvm_unreachable("Do not know how to expand this operator's operand!"); - - case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; - case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; - case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; - - case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; - case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; - case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; - case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; - case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break; - case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break; - case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N), - OpNo); break; - } + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ExpandFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to expand this operator's operand!"); + + case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + + case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; + case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; + case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; + case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break; + case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N), + OpNo); break; } // If the result is null, the sub-method took care of registering results etc. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index e8e968a..a370fae 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -644,8 +644,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { EVT SmallVT = LHS.getValueType(); // To determine if the result overflowed in a larger type, we extend the - // input to the larger type, do the multiply, then check the high bits of - // the result to see if the overflow happened. + // input to the larger type, do the multiply (checking if it overflows), + // then also check the high bits of the result to see if overflow happened + // there. if (N->getOpcode() == ISD::SMULO) { LHS = SExtPromotedInteger(LHS); RHS = SExtPromotedInteger(RHS); @@ -653,24 +654,31 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { LHS = ZExtPromotedInteger(LHS); RHS = ZExtPromotedInteger(RHS); } - SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS); + SDVTList VTs = DAG.getVTList(LHS.getValueType(), N->getValueType(1)); + SDValue Mul = DAG.getNode(N->getOpcode(), DL, VTs, LHS, RHS); - // Overflow occurred iff the high part of the result does not - // zero/sign-extend the low part. + // Overflow occurred if it occurred in the larger type, or if the high part + // of the result does not zero/sign-extend the low part. Check this second + // possibility first. SDValue Overflow; if (N->getOpcode() == ISD::UMULO) { - // Unsigned overflow occurred iff the high part is non-zero. + // Unsigned overflow occurred if the high part is non-zero. SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, DAG.getIntPtrConstant(SmallVT.getSizeInBits())); Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, DAG.getConstant(0, Hi.getValueType()), ISD::SETNE); } else { - // Signed overflow occurred iff the high part does not sign extend the low. + // Signed overflow occurred if the high part does not sign extend the low. SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(), Mul, DAG.getValueType(SmallVT)); Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE); } + // The only other way for overflow to occur is if the multiplication in the + // larger type itself overflowed. + Overflow = DAG.getNode(ISD::OR, DL, N->getValueType(1), Overflow, + SDValue(Mul.getNode(), 1)); + // Use the calculated overflow everywhere. ReplaceValueWith(SDValue(N, 1), Overflow); return Mul; @@ -2253,32 +2261,35 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); - EVT PtrVT = TLI.getPointerTy(); - Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); DebugLoc dl = N->getDebugLoc(); // A divide for UMULO should be faster than a function call. if (N->getOpcode() == ISD::UMULO) { SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); - DebugLoc DL = N->getDebugLoc(); - SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS); + SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS); SplitInteger(MUL, Lo, Hi); // A divide for UMULO will be faster than a function call. Select to // make sure we aren't using 0. SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), - RHS, DAG.getConstant(0, VT), ISD::SETNE); + RHS, DAG.getConstant(0, VT), ISD::SETEQ); SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero, DAG.getConstant(1, VT), RHS); - SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero); - SDValue Overflow; - Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE); + SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero); + SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS, + ISD::SETNE); + Overflow = DAG.getNode(ISD::SELECT, dl, N->getValueType(1), isZero, + DAG.getConstant(0, N->getValueType(1)), + Overflow); ReplaceValueWith(SDValue(N, 1), Overflow); return; } + Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); + EVT PtrVT = TLI.getPointerTy(); + Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); + // Replace this with a libcall that will check overflow. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i32) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 39337ff..644e36e 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -15,7 +15,7 @@ #include "LegalizeTypes.h" #include "llvm/CallingConv.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/ADT/SetVector.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 94fc976..20b7ce6 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -625,6 +625,7 @@ private: SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); SDValue WidenVecRes_VSETCC(SDNode* N); + SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); @@ -633,7 +634,7 @@ private: SDValue WidenVecRes_InregOp(SDNode *N); // Widen Vector Operand. - bool WidenVectorOperand(SDNode *N, unsigned ResNo); + bool WidenVectorOperand(SDNode *N, unsigned OpNo); SDValue WidenVecOp_BITCAST(SDNode *N); SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 06f6bd6..6bcb3b2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -20,7 +20,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -94,14 +94,48 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { if (InVT.isVector() && OutVT.isInteger()) { // Handle cases like i64 = BITCAST v1i64 on x86, where the operand // is legal but the result is not. - EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2); + unsigned NumElems = 2; + EVT ElemVT = NOutVT; + EVT NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); + + // If <ElemVT * N> is not a legal type, try <ElemVT/2 * (N*2)>. + while (!isTypeLegal(NVT)) { + unsigned NewSizeInBits = ElemVT.getSizeInBits() / 2; + // If the element size is smaller than byte, bail. + if (NewSizeInBits < 8) + break; + NumElems *= 2; + ElemVT = EVT::getIntegerVT(*DAG.getContext(), NewSizeInBits); + NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); + } if (isTypeLegal(NVT)) { SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp); - Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, - DAG.getIntPtrConstant(1)); + + SmallVector<SDValue, 8> Vals; + for (unsigned i = 0; i < NumElems; ++i) + Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, + CastInOp, DAG.getIntPtrConstant(i))); + + // Build Lo, Hi pair by pairing extracted elements if needed. + unsigned Slot = 0; + for (unsigned e = Vals.size(); e - Slot > 2; Slot += 2, e += 1) { + // Each iteration will BUILD_PAIR two nodes and append the result until + // there are only two nodes left, i.e. Lo and Hi. + SDValue LHS = Vals[Slot]; + SDValue RHS = Vals[Slot + 1]; + + if (TLI.isBigEndian()) + std::swap(LHS, RHS); + + Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, + EVT::getIntegerVT( + *DAG.getContext(), + LHS.getValueType().getSizeInBits() << 1), + LHS, RHS)); + } + Lo = Vals[Slot++]; + Hi = Vals[Slot++]; if (TLI.isBigEndian()) std::swap(Lo, Hi); @@ -116,7 +150,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Create the stack frame object. Make sure it is aligned for both // the source and expanded destination types. unsigned Alignment = - TLI.getTargetData()->getPrefTypeAlignment(NOutVT. + TLI.getDataLayout()->getPrefTypeAlignment(NOutVT. getTypeForEVT(*DAG.getContext())); SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 704f99b..22f8d51 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -64,6 +64,7 @@ class VectorLegalizer { // Implement vselect in terms of XOR, AND, OR when blend is not supported // by the target. SDValue ExpandVSELECT(SDValue Op); + SDValue ExpandSELECT(SDValue Op); SDValue ExpandLoad(SDValue Op); SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); @@ -220,6 +221,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FFLOOR: + case ISD::FMA: case ISD::SIGN_EXTEND_INREG: QueryType = Node->getValueType(0); break; @@ -260,6 +262,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case TargetLowering::Expand: if (Node->getOpcode() == ISD::VSELECT) Result = ExpandVSELECT(Op); + else if (Node->getOpcode() == ISD::SELECT) + Result = ExpandSELECT(Op); else if (Node->getOpcode() == ISD::UINT_TO_FP) Result = ExpandUINT_TO_FLOAT(Op); else if (Node->getOpcode() == ISD::FNEG) @@ -435,6 +439,66 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { return TF; } +SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { + // Lower a select instruction where the condition is a scalar and the + // operands are vectors. Lower this select to VSELECT and implement it + // using XOR AND OR. The selector bit is broadcasted. + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + + SDValue Mask = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + + assert(VT.isVector() && !Mask.getValueType().isVector() + && Op1.getValueType() == Op2.getValueType() && "Invalid type"); + + unsigned NumElem = VT.getVectorNumElements(); + + // If we can't even use the basic vector operations of + // AND,OR,XOR, we will have to scalarize the op. + // Notice that the operation may be 'promoted' which means that it is + // 'bitcasted' to another type which is handled. + // Also, we need to be able to construct a splat vector using BUILD_VECTOR. + if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + // Generate a mask operand. + EVT MaskTy = TLI.getSetCCResultType(VT); + assert(MaskTy.isVector() && "Invalid CC type"); + assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits() + && "Invalid mask size"); + + // What is the size of each element in the vector mask. + EVT BitTy = MaskTy.getScalarType(); + + Mask = DAG.getNode(ISD::SELECT, DL, BitTy, Mask, + DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy), + DAG.getConstant(0, BitTy)); + + // Broadcast the mask so that the entire vector is all-one or all zero. + SmallVector<SDValue, 8> Ops(NumElem, Mask); + Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size()); + + // Bitcast the operands to be the same type as the mask. + // This is needed when we select between FP types because + // the mask is a vector of integers. + Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1); + Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); + + SDValue AllOnes = DAG.getConstant( + APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy); + SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes); + + Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); + Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); + SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2); + return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. @@ -449,12 +513,17 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // AND,OR,XOR, we will have to scalarize the op. // Notice that the operation may be 'promoted' which means that it is // 'bitcasted' to another type which is handled. + // This operation also isn't safe with AND, OR, XOR when the boolean + // type is 0/1 as we need an all ones vector constant to mask with. + // FIXME: Sign extend 1 to all ones if thats legal on the target. if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand) + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getBooleanContents(true) != + TargetLowering::ZeroOrNegativeOneBooleanContent) return DAG.UnrollVectorOp(Op.getNode()); - assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits() + assert(VT.getSizeInBits() == Op1.getValueType().getSizeInBits() && "Invalid mask size"); // Bitcast the operands to be the same type as the mask. // This is needed when we select between FP types because diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4709202..d51a6eb 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -21,7 +21,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -749,7 +749,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = - TLI.getTargetData()->getPrefTypeAlignment(VecType); + TLI.getDataLayout()->getPrefTypeAlignment(VecType); Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT, false, false, 0); @@ -1366,6 +1366,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FTRUNC: Res = WidenVecRes_Unary(N); break; + case ISD::FMA: + Res = WidenVecRes_Ternary(N); + break; } // If Res is null, the sub-method took care of registering the result. @@ -1373,6 +1376,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { SetWidenedVector(SDValue(N, ResNo), Res); } +SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { + // Ternary op widening. + DebugLoc dl = N->getDebugLoc(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + SDValue InOp3 = GetWidenedVector(N->getOperand(2)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); +} + SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { // Binary op widening. unsigned Opcode = N->getOpcode(); @@ -2069,16 +2082,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { //===----------------------------------------------------------------------===// // Widen Vector Operand //===----------------------------------------------------------------------===// -bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Widen node operand " << ResNo << ": "; +bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); + // See if the target wants to custom widen this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + switch (N->getOpcode()) { default: #ifndef NDEBUG - dbgs() << "WidenVectorOperand op #" << ResNo << ": "; + dbgs() << "WidenVectorOperand op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; #endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h index f88b26d..d2269f8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h @@ -28,8 +28,8 @@ class SDNode; class SDNodeOrdering { DenseMap<const SDNode*, unsigned> OrderMap; - void operator=(const SDNodeOrdering&); // Do not implement. - SDNodeOrdering(const SDNodeOrdering&); // Do not implement. + void operator=(const SDNodeOrdering&) LLVM_DELETED_FUNCTION; + SDNodeOrdering(const SDNodeOrdering&) LLVM_DELETED_FUNCTION; public: SDNodeOrdering() {} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index b7ce48a..2ecdd89 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -13,11 +13,12 @@ #define DEBUG_TYPE "pre-RA-sched" #include "ScheduleDAGSDNodes.h" +#include "InstrEmitter.h" #include "llvm/InlineAsm.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/SmallSet.h" @@ -34,6 +35,10 @@ STATISTIC(NumPRCopies, "Number of physical copies"); static RegisterScheduler fastDAGScheduler("fast", "Fast suboptimal list scheduling", createFastDAGScheduler); +static RegisterScheduler + linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling", + createDAGLinearizer); + namespace { /// FastPriorityQueue - A degenerate priority queue that considers @@ -331,7 +336,9 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { } } if (isNewLoad) { - AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency)); + SDep D(LoadSU, SDep::Barrier); + D.setLatency(LoadSU->Latency); + AddPred(NewSU, D); } ++NumUnfolds; @@ -407,9 +414,12 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { RemovePred(DelDeps[i].first, DelDeps[i].second); } - - AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); - AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); + SDep FromDep(SU, SDep::Data, Reg); + FromDep.setLatency(SU->Latency); + AddPred(CopyFromSU, FromDep); + SDep ToDep(CopyFromSU, SDep::Data, 0); + ToDep.setLatency(CopyFromSU->Latency); + AddPred(CopyToSU, ToDep); Copies.push_back(CopyFromSU); Copies.push_back(CopyToSU); @@ -586,18 +596,14 @@ void ScheduleDAGFast::ListScheduleBottomUp() { InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"); - AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, /*isArtificial=*/true)); + AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); NewDef = Copies.back(); } DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; - AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, /*isArtificial=*/true)); + AddPred(NewDef, SDep(TrySU, SDep::Artificial)); TrySU->isAvailable = false; CurSU = NewDef; } @@ -629,6 +635,155 @@ void ScheduleDAGFast::ListScheduleBottomUp() { #endif } + +namespace { +//===----------------------------------------------------------------------===// +// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the +// DAG in topological order. +// IMPORTANT: this may not work for targets with phyreg dependency. +// +class ScheduleDAGLinearize : public ScheduleDAGSDNodes { +public: + ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} + + void Schedule(); + + MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + +private: + std::vector<SDNode*> Sequence; + DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user + + void ScheduleNode(SDNode *N); +}; +} // end anonymous namespace + +void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { + if (N->getNodeId() != 0) + llvm_unreachable(0); + + if (!N->isMachineOpcode() && + (N->getOpcode() == ISD::EntryToken || isPassiveNode(N))) + // These nodes do not need to be translated into MIs. + return; + + DEBUG(dbgs() << "\n*** Scheduling: "); + DEBUG(N->dump(DAG)); + Sequence.push_back(N); + + unsigned NumOps = N->getNumOperands(); + if (unsigned NumLeft = NumOps) { + SDNode *GluedOpN = 0; + do { + const SDValue &Op = N->getOperand(NumLeft-1); + SDNode *OpN = Op.getNode(); + + if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) { + // Schedule glue operand right above N. + GluedOpN = OpN; + assert(OpN->getNodeId() != 0 && "Glue operand not ready?"); + OpN->setNodeId(0); + ScheduleNode(OpN); + continue; + } + + if (OpN == GluedOpN) + // Glue operand is already scheduled. + continue; + + DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN); + if (DI != GluedMap.end() && DI->second != N) + // Users of glues are counted against the glued users. + OpN = DI->second; + + unsigned Degree = OpN->getNodeId(); + assert(Degree > 0 && "Predecessor over-released!"); + OpN->setNodeId(--Degree); + if (Degree == 0) + ScheduleNode(OpN); + } while (--NumLeft); + } +} + +/// findGluedUser - Find the representative use of a glue value by walking +/// the use chain. +static SDNode *findGluedUser(SDNode *N) { + while (SDNode *Glued = N->getGluedUser()) + N = Glued; + return N; +} + +void ScheduleDAGLinearize::Schedule() { + DEBUG(dbgs() << "********** DAG Linearization **********\n"); + + SmallVector<SDNode*, 8> Glues; + unsigned DAGSize = 0; + for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(), + E = DAG->allnodes_end(); I != E; ++I) { + SDNode *N = I; + + // Use node id to record degree. + unsigned Degree = N->use_size(); + N->setNodeId(Degree); + unsigned NumVals = N->getNumValues(); + if (NumVals && N->getValueType(NumVals-1) == MVT::Glue && + N->hasAnyUseOfValue(NumVals-1)) { + SDNode *User = findGluedUser(N); + if (User) { + Glues.push_back(N); + GluedMap.insert(std::make_pair(N, User)); + } + } + + if (N->isMachineOpcode() || + (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N))) + ++DAGSize; + } + + for (unsigned i = 0, e = Glues.size(); i != e; ++i) { + SDNode *Glue = Glues[i]; + SDNode *GUser = GluedMap[Glue]; + unsigned Degree = Glue->getNodeId(); + unsigned UDegree = GUser->getNodeId(); + + // Glue user must be scheduled together with the glue operand. So other + // users of the glue operand must be treated as its users. + SDNode *ImmGUser = Glue->getGluedUser(); + for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end(); + ui != ue; ++ui) + if (*ui == ImmGUser) + --Degree; + GUser->setNodeId(UDegree + Degree); + Glue->setNodeId(1); + } + + Sequence.reserve(DAGSize); + ScheduleNode(DAG->getRoot().getNode()); +} + +MachineBasicBlock* +ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { + InstrEmitter Emitter(BB, InsertPos); + DenseMap<SDValue, unsigned> VRBaseMap; + + DEBUG({ + dbgs() << "\n*** Final schedule ***\n"; + }); + + // FIXME: Handle dbg_values. + unsigned NumNodes = Sequence.size(); + for (unsigned i = 0; i != NumNodes; ++i) { + SDNode *N = Sequence[NumNodes-i-1]; + DEBUG(N->dump(DAG)); + Emitter.EmitNode(N, false, false, VRBaseMap); + } + + DEBUG(dbgs() << '\n'); + + InsertPos = Emitter.getInsertPos(); + return Emitter.getBlock(); +} + //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// @@ -637,3 +792,8 @@ llvm::ScheduleDAGSDNodes * llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { return new ScheduleDAGFast(*IS->MF); } + +llvm::ScheduleDAGSDNodes * +llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGLinearize(*IS->MF); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index bf0a437..c554569 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -22,7 +22,7 @@ #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -656,6 +656,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) { break; case ISD::MERGE_VALUES: case ISD::TokenFactor: + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: case ISD::CopyToReg: case ISD::CopyFromReg: case ISD::EH_LABEL: @@ -1056,7 +1058,9 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { // Add a data dependency to reflect that NewSU reads the value defined // by LoadSU. - AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); + SDep D(LoadSU, SDep::Data, 0); + D.setLatency(LoadSU->Latency); + AddPred(NewSU, D); if (isNewLoad) AvailableQueue->addNode(LoadSU); @@ -1138,17 +1142,18 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, // Avoid scheduling the def-side copy before other successors. Otherwise // we could introduce another physreg interference on the copy and // continue inserting copies indefinitely. - SDep D(CopyFromSU, SDep::Order, /*Latency=*/0, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, /*isArtificial=*/true); - AddPred(SuccSU, D); + AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial)); } } for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) RemovePred(DelDeps[i].first, DelDeps[i].second); - AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); - AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); + SDep FromDep(SU, SDep::Data, Reg); + FromDep.setLatency(SU->Latency); + AddPred(CopyFromSU, FromDep); + SDep ToDep(CopyFromSU, SDep::Data, 0); + ToDep.setLatency(CopyFromSU->Latency); + AddPred(CopyToSU, ToDep); AvailableQueue->updateNode(SU); AvailableQueue->addNode(CopyFromSU); @@ -1357,9 +1362,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { if (!BtSU->isPending) AvailableQueue->remove(BtSU); } - AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, /*isArtificial=*/true)); + AddPred(TrySU, SDep(BtSU, SDep::Artificial)); // If one or more successors has been unscheduled, then the current // node is no longer avaialable. Schedule a successor that's now @@ -1411,20 +1414,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"); - AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); + AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); NewDef = Copies.back(); } DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; - AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); + AddPred(NewDef, SDep(TrySU, SDep::Artificial)); TrySU->isAvailable = false; CurSU = NewDef; } @@ -1756,6 +1753,7 @@ public: return V; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void dump(ScheduleDAG *DAG) const { // Emulate pop() without clobbering NodeQueueIds. std::vector<SUnit*> DumpQueue = Queue; @@ -1766,6 +1764,7 @@ public: SU->dump(DAG); } } +#endif }; typedef RegReductionPriorityQueue<bu_ls_rr_sort> @@ -1893,6 +1892,7 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { //===----------------------------------------------------------------------===// void RegReductionPQBase::dumpRegPressure() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), E = TRI->regclass_end(); I != E; ++I) { const TargetRegisterClass *RC = *I; @@ -1902,6 +1902,7 @@ void RegReductionPQBase::dumpRegPressure() const { DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] << '\n'); } +#endif } bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { @@ -2930,10 +2931,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { !scheduleDAG->IsReachable(SuccSU, SU)) { DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); - scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); + scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial)); } } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 748668c..a197fcb 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -485,14 +485,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if(isChain && OpN->getOpcode() == ISD::TokenFactor) OpLatency = 0; - const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, - OpLatency, PhysReg); + SDep Dep = isChain ? SDep(OpSU, SDep::Barrier) + : SDep(OpSU, SDep::Data, PhysReg); + Dep.setLatency(OpLatency); if (!isChain && !UnitLatencies) { - computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); - ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); + computeOperandLatency(OpN, N, i, Dep); + ST.adjustSchedDependency(OpSU, SU, Dep); } - if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { + if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { // Multiple register uses are combined in the same SUnit. For example, // we could have a set of glued nodes with all their defs consumed by // another set of glued nodes. Register pressure tracking sees this as @@ -643,6 +644,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, } void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) if (!SU->getNode()) { dbgs() << "PHYS REG COPY\n"; return; @@ -659,8 +661,10 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { dbgs() << "\n"; GluedNodes.pop_back(); } +#endif } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ScheduleDAGSDNodes::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { if (SUnit *SU = Sequence[i]) @@ -669,6 +673,7 @@ void ScheduleDAGSDNodes::dumpSchedule() const { dbgs() << "**** NOOP ****\n"; } } +#endif #ifndef NDEBUG /// VerifyScheduledSequence - Verify that all SUnits were scheduled and that @@ -827,8 +832,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { } SmallVector<SDNode *, 4> GluedNodes; - for (SDNode *N = SU->getNode()->getGluedNode(); N; - N = N->getGluedNode()) + for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) GluedNodes.push_back(N); while (!GluedNodes.empty()) { SDNode *N = GluedNodes.back(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 84e41fc..907356f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -114,7 +114,8 @@ namespace llvm { /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock /// according to the order specified in Sequence. /// - MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + virtual MachineBasicBlock* + EmitSchedule(MachineBasicBlock::iterator &InsertPos); virtual void dumpNode(const SUnit *SU) const; @@ -158,6 +159,12 @@ namespace llvm { void InitNodeNumDefs(); }; + protected: + /// ForceUnitLatencies - Return true if all scheduling edges should be given + /// a latency value of one. The default is to return false; schedulers may + /// override this as needed. + virtual bool forceUnitLatencies() const { return false; } + private: /// ClusterNeighboringLoads - Cluster loads from "near" addresses into /// combined SUnits. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index c851291..30f03ac 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -25,7 +25,7 @@ #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f4fe892..f000ce3 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -29,7 +29,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetOptions.h" @@ -91,11 +91,6 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, const APFloat& Val) { assert(VT.isFloatingPoint() && "Can only convert between FP types"); - // PPC long double cannot be converted to any other type. - if (VT == MVT::ppcf128 || - &Val.getSemantics() == &APFloat::PPCDoubleDouble) - return false; - // convert modifies in place, so make a copy. APFloat Val2 = APFloat(Val); bool losesInfo; @@ -136,13 +131,11 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { // constants are. SDValue NotZero = N->getOperand(i); unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); - if (isa<ConstantSDNode>(NotZero)) { - if (cast<ConstantSDNode>(NotZero)->getAPIntValue().countTrailingOnes() < - EltSize) + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) { + if (CN->getAPIntValue().countTrailingOnes() < EltSize) return false; - } else if (isa<ConstantFPSDNode>(NotZero)) { - if (cast<ConstantFPSDNode>(NotZero)->getValueAPF() - .bitcastToAPInt().countTrailingOnes() < EltSize) + } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) { + if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize) return false; } else return false; @@ -179,11 +172,11 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { // Do not accept build_vectors that aren't all constants or which have non-0 // elements. SDValue Zero = N->getOperand(i); - if (isa<ConstantSDNode>(Zero)) { - if (!cast<ConstantSDNode>(Zero)->isNullValue()) + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) { + if (!CN->isNullValue()) return false; - } else if (isa<ConstantFPSDNode>(Zero)) { - if (!cast<ConstantFPSDNode>(Zero)->getValueAPF().isPosZero()) + } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) { + if (!CFPN->getValueAPF().isPosZero()) return false; } else return false; @@ -494,8 +487,10 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { } case ISD::TargetBlockAddress: case ISD::BlockAddress: { - ID.AddPointer(cast<BlockAddressSDNode>(N)->getBlockAddress()); - ID.AddInteger(cast<BlockAddressSDNode>(N)->getTargetFlags()); + const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N); + ID.AddPointer(BA->getBlockAddress()); + ID.AddInteger(BA->getOffset()); + ID.AddInteger(BA->getTargetFlags()); break; } } // end switch (N->getOpcode()) @@ -883,7 +878,7 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { PointerType::get(Type::getInt8Ty(*getContext()), 0) : VT.getTypeForEVT(*getContext()); - return TLI.getTargetData()->getABITypeAlignment(Ty); + return TLI.getDataLayout()->getABITypeAlignment(Ty); } // EntryNode could meaningfully have debug info if we can find it... @@ -1097,10 +1092,9 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, "Cannot set target flags on target-independent globals"); // Truncate (with sign-extension) the offset value to the pointer size. - EVT PTy = TLI.getPointerTy(); - unsigned BitWidth = PTy.getSizeInBits(); + unsigned BitWidth = TLI.getPointerTy().getSizeInBits(); if (BitWidth < 64) - Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth)); + Offset = SignExtend64(Offset, BitWidth); const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); if (!GVar) { @@ -1174,7 +1168,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType()); + Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); @@ -1201,7 +1195,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType()); + Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); @@ -1471,6 +1465,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, + int64_t Offset, bool isTarget, unsigned char TargetFlags) { unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; @@ -1478,12 +1473,14 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); ID.AddPointer(BA); + ID.AddInteger(Offset); ID.AddInteger(TargetFlags); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, TargetFlags); + SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset, + TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1542,7 +1539,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { unsigned ByteSize = VT.getStoreSize(); Type *Ty = VT.getTypeForEVT(*getContext()); unsigned StackAlign = - std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign); + std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), minAlign); int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); return getFrameIndex(FrameIdx, TLI.getPointerTy()); @@ -1555,7 +1552,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { VT2.getStoreSizeInBits())/8; Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); - const TargetData *TD = TLI.getTargetData(); + const DataLayout *TD = TLI.getDataLayout(); unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), TD->getPrefTypeAlignment(Ty2)); @@ -1610,10 +1607,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, } if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) { if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) { - // No compile time operations on this type yet. - if (N1C->getValueType(0) == MVT::ppcf128) - return SDValue(); - APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF()); switch (Cond) { default: break; @@ -2445,8 +2438,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { - // No compile time operations on ppcf128. - if (VT == MVT::ppcf128) break; APFloat apf(APInt::getNullValue(VT.getSizeInBits())); (void)apf.convertFromAPInt(Val, Opcode==ISD::SINT_TO_FP, @@ -2455,9 +2446,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, } case ISD::BITCAST: if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) - return getConstantFP(Val.bitsToFloat(), VT); + return getConstantFP(APFloat(Val), VT); else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) - return getConstantFP(Val.bitsToDouble(), VT); + return getConstantFP(APFloat(Val), VT); break; case ISD::BSWAP: return getConstant(Val.byteSwap(), VT); @@ -2475,61 +2466,59 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, // Constant fold unary operations with a floating point constant operand. if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) { APFloat V = C->getValueAPF(); // make copy - if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) { - switch (Opcode) { - case ISD::FNEG: - V.changeSign(); + switch (Opcode) { + case ISD::FNEG: + V.changeSign(); + return getConstantFP(V, VT); + case ISD::FABS: + V.clearSign(); + return getConstantFP(V, VT); + case ISD::FCEIL: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); + if (fs == APFloat::opOK || fs == APFloat::opInexact) return getConstantFP(V, VT); - case ISD::FABS: - V.clearSign(); + break; + } + case ISD::FTRUNC: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); + if (fs == APFloat::opOK || fs == APFloat::opInexact) return getConstantFP(V, VT); - case ISD::FCEIL: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); - break; - } - case ISD::FTRUNC: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); - break; - } - case ISD::FFLOOR: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); - break; - } - case ISD::FP_EXTEND: { - bool ignored; - // This can return overflow, underflow, or inexact; we don't care. - // FIXME need to be more flexible about rounding mode. - (void)V.convert(*EVTToAPFloatSemantics(VT), - APFloat::rmNearestTiesToEven, &ignored); + break; + } + case ISD::FFLOOR: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); + if (fs == APFloat::opOK || fs == APFloat::opInexact) return getConstantFP(V, VT); - } - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: { - integerPart x[2]; - bool ignored; - assert(integerPartWidth >= 64); - // FIXME need to be more flexible about rounding mode. - APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), - Opcode==ISD::FP_TO_SINT, - APFloat::rmTowardZero, &ignored); - if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual - break; - APInt api(VT.getSizeInBits(), x); - return getConstant(api, VT); - } - case ISD::BITCAST: - if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) - return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); - else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) - return getConstant(V.bitcastToAPInt().getZExtValue(), VT); + break; + } + case ISD::FP_EXTEND: { + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(*EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, VT); + } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: { + integerPart x[2]; + bool ignored; + assert(integerPartWidth >= 64); + // FIXME need to be more flexible about rounding mode. + APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), + Opcode==ISD::FP_TO_SINT, + APFloat::rmTowardZero, &ignored); + if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual break; - } + APInt api(VT.getSizeInBits(), x); + return getConstant(api, VT); + } + case ISD::BITCAST: + if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); + else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) + return getConstant(V.bitcastToAPInt().getZExtValue(), VT); + break; } } @@ -2817,6 +2806,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2)) if (CFP->getValueAPF().isZero()) return N1; + } else if (Opcode == ISD::FMUL) { + ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1); + SDValue V = N2; + + // If the first operand isn't the constant, try the second + if (!CFP) { + CFP = dyn_cast<ConstantFPSDNode>(N2); + V = N1; + } + + if (CFP) { + // 0*x --> 0 + if (CFP->isZero()) + return SDValue(CFP,0); + // 1*x --> x + if (CFP->isExactlyValue(1.0)) + return V; + } } } assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); @@ -2935,17 +2942,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // expanding large vector constants. if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) { SDValue Elt = N1.getOperand(N2C->getZExtValue()); - EVT VEltTy = N1.getValueType().getVectorElementType(); - if (Elt.getValueType() != VEltTy) { + + if (VT != Elt.getValueType()) // If the vector element type is not legal, the BUILD_VECTOR operands - // are promoted and implicitly truncated. Make that explicit here. - Elt = getNode(ISD::TRUNCATE, DL, VEltTy, Elt); - } - if (VT != VEltTy) { - // If the vector element type is not legal, the EXTRACT_VECTOR_ELT - // result is implicitly extended. - Elt = getNode(ISD::ANY_EXTEND, DL, VT, Elt); - } + // are promoted and implicitly truncated, and the result implicitly + // extended. Make that explicit here. + Elt = getAnyExtOrTrunc(Elt, DL, VT); + return Elt; } @@ -3036,7 +3039,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // Cannonicalize constant to RHS if commutative std::swap(N1CFP, N2CFP); std::swap(N1, N2); - } else if (N2CFP && VT != MVT::ppcf128) { + } else if (N2CFP) { APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF(); APFloat::opStatus s; switch (Opcode) { @@ -3435,7 +3438,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, DAG.getMachineFunction()); if (VT == MVT::Other) { - if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() || + if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() || TLI.allowsUnalignedMemoryAccesses(VT)) { VT = TLI.getPointerTy(); } else { @@ -3503,7 +3506,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptSize = + MF.getFunction()->getFnAttributes(). + hasAttribute(Attributes::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3523,7 +3528,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) @@ -3596,7 +3601,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->getFnAttributes(). + hasAttribute(Attributes::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3612,7 +3618,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) @@ -3674,7 +3680,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->getFnAttributes(). + hasAttribute(Attributes::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3687,7 +3694,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) @@ -3781,7 +3788,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); + Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); @@ -3836,7 +3843,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); + Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); @@ -3885,7 +3892,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, return Result; // Emit a library call. - Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext()); + Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; @@ -3923,17 +3930,21 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, AtomicOrdering Ordering, - SynchronizationScope SynchScope) { + SynchronizationScope SynchScope) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE. // For now, atomics are considered to be volatile always. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - Flags |= MachineMemOperand::MOVolatile; + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); @@ -3983,17 +3994,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - // A monotonic store does not load; a release store "loads" in the sense - // that other stores cannot be sunk past it. + // An atomic store does not load. An atomic load does not store. // (An atomicrmw obviously both loads and stores.) - unsigned Flags = MachineMemOperand::MOStore; - if (Opcode != ISD::ATOMIC_STORE || Ordering > Monotonic) - Flags |= MachineMemOperand::MOLoad; - - // For now, atomics are considered to be volatile always. + // For now, atomics are considered to be volatile always, and they are + // chained as such. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - Flags |= MachineMemOperand::MOVolatile; + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, @@ -4056,16 +4067,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - // A monotonic load does not store; an acquire load "stores" in the sense - // that other loads cannot be hoisted past it. - unsigned Flags = MachineMemOperand::MOLoad; - if (Ordering > Monotonic) - Flags |= MachineMemOperand::MOStore; - - // For now, atomics are considered to be volatile always. + // An atomic store does not load. An atomic load does not store. + // (An atomicrmw obviously both loads and stores.) + // For now, atomics are considered to be volatile always, and they are + // chained as such. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - Flags |= MachineMemOperand::MOVolatile; + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, @@ -4157,6 +4169,8 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, assert((Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::PREFETCH || + Opcode == ISD::LIFETIME_START || + Opcode == ISD::LIFETIME_END || (Opcode <= INT_MAX && (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && "Opcode is not a memory-accessing opcode!"); @@ -4226,7 +4240,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo, const MDNode *Ranges) { - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(VT); @@ -4284,7 +4298,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), - MMO->isNonTemporal(), + MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; @@ -4303,7 +4317,7 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, - bool isInvariant, unsigned Alignment, + bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo, const MDNode *Ranges) { SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -4332,7 +4346,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, "Load is already a indexed load!"); return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, LD->getChain(), Base, Offset, LD->getPointerInfo(), - LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), + LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), false, LD->getAlignment()); } @@ -4340,7 +4354,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(Val.getValueType()); @@ -4365,7 +4379,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, MachineMemOperand *MMO) { - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); EVT VT = Val.getValueType(); SDVTList VTs = getVTList(MVT::Other); @@ -4394,7 +4408,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, EVT SVT,bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(SVT); @@ -4421,7 +4435,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, MachineMemOperand *MMO) { EVT VT = Val.getValueType(); - assert(Chain.getValueType() == MVT::Other && + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (VT == SVT) return getStore(Chain, dl, Val, Ptr, MMO); @@ -6074,7 +6088,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { unsigned PtrWidth = TLI.getPointerTy().getSizeInBits(); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, - TLI.getTargetData()); + TLI.getDataLayout()); unsigned AlignBits = KnownZero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ba5bd79..3fbf7c2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/CallingConv.h" #include "llvm/DebugInfo.h" @@ -43,7 +44,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -88,7 +89,7 @@ static const unsigned MaxParallelChains = 64; static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, - EVT PartVT, EVT ValueVT); + EVT PartVT, EVT ValueVT, const Value *V); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type @@ -98,9 +99,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, EVT PartVT, EVT ValueVT, + const Value *V, ISD::NodeType AssertOp = ISD::DELETED_NODE) { if (ValueVT.isVector()) - return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT); + return getCopyFromPartsVector(DAG, DL, Parts, NumParts, + PartVT, ValueVT, V); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -124,9 +127,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, if (RoundParts > 2) { Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, - PartVT, HalfVT); + PartVT, HalfVT, V); Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, - RoundParts / 2, PartVT, HalfVT); + RoundParts / 2, PartVT, HalfVT, V); } else { Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); @@ -142,7 +145,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, unsigned OddParts = NumParts - RoundParts; EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); Hi = getCopyFromParts(DAG, DL, - Parts + RoundParts, OddParts, PartVT, OddVT); + Parts + RoundParts, OddParts, PartVT, OddVT, V); // Combine the round and odd parts. Lo = Val; @@ -171,7 +174,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && !PartVT.isVector() && "Unexpected split"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); - Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT); + Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V); } } @@ -209,14 +212,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, llvm_unreachable("Unknown mismatch!"); } -/// getCopyFromParts - Create a value that contains the specified legal parts -/// combined into the value they represent. If the parts combine to a type -/// larger then ValueVT then AssertOp can be used to specify whether the extra -/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT -/// (ISD::AssertSext). +/// getCopyFromPartsVector - Create a value that contains the specified legal +/// parts combined into the value they represent. If the parts combine to a +/// type larger then ValueVT then AssertOp can be used to specify whether the +/// extra bits are known to be zero (ISD::AssertZext) or sign extended from +/// ValueVT (ISD::AssertSext). static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, - EVT PartVT, EVT ValueVT) { + EVT PartVT, EVT ValueVT, const Value *V) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -242,7 +245,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, - PartVT, IntermediateVT); + PartVT, IntermediateVT, V); } else if (NumParts > 0) { // If the intermediate type was expanded, build the intermediate // operands from the parts. @@ -251,7 +254,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, - PartVT, IntermediateVT); + PartVT, IntermediateVT, V); } // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the @@ -299,8 +302,19 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); // Handle cases such as i8 -> <1 x i1> - assert(ValueVT.getVectorNumElements() == 1 && - "Only trivial scalar-to-vector conversions should get here!"); + if (ValueVT.getVectorNumElements() != 1) { + LLVMContext &Ctx = *DAG.getContext(); + Twine ErrMsg("non-trivial scalar-to-vector conversion"); + if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (isa<InlineAsm>(CI->getCalledValue())) + ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; + Ctx.emitError(I, ErrMsg); + } else { + Ctx.emitError(ErrMsg); + } + report_fatal_error("Cannot handle scalar-to-vector conversion!"); + } if (ValueVT.getVectorNumElements() == 1 && ValueVT.getVectorElementType() != PartVT) { @@ -312,25 +326,22 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } - - - static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT); + EVT PartVT, const Value *V); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT, + EVT PartVT, const Value *V, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { EVT ValueVT = Val.getValueType(); // Handle the vector case separately. if (ValueVT.isVector()) - return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT); + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned PartBits = PartVT.getSizeInBits(); @@ -382,7 +393,19 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, "Failed to tile the value with PartVT!"); if (NumParts == 1) { - assert(PartVT == ValueVT && "Type conversion failed!"); + if (PartVT != ValueVT) { + LLVMContext &Ctx = *DAG.getContext(); + Twine ErrMsg("scalar-to-vector conversion failed"); + if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (isa<InlineAsm>(CI->getCalledValue())) + ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; + Ctx.emitError(I, ErrMsg); + } else { + Ctx.emitError(ErrMsg); + } + } + Parts[0] = Val; return; } @@ -397,7 +420,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, DAG.getIntPtrConstant(RoundBits)); - getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT); + getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); if (TLI.isBigEndian()) // The odd parts were reversed by getCopyToParts - unreverse them. @@ -443,7 +466,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, /// value split into legal parts. static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT) { + EVT PartVT, const Value *V) { EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -529,7 +552,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, // If the register was not expanded, promote or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) - getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT); + getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V); } else if (NumParts > 0) { // If the intermediate type was expanded, split each the value into // legal parts. @@ -537,13 +560,10 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) - getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT); + getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); } } - - - namespace { /// RegsForValue - This struct represents the registers (physical or virtual) /// that a particular set of values is assigned, and the type information @@ -621,14 +641,15 @@ namespace { /// If the Flag pointer is NULL, no flag is used. SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; + SDValue &Chain, SDValue *Flag, + const Value *V = 0) const; /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; + SDValue &Chain, SDValue *Flag, const Value *V) const; /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker, matching input operand index @@ -647,7 +668,8 @@ namespace { SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { + SDValue &Chain, SDValue *Flag, + const Value *V) const { // A Value with type {} or [0 x %t] needs no registers. if (ValueVTs.empty()) return SDValue(); @@ -721,7 +743,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, } Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), - NumRegs, RegisterVT, ValueVT); + NumRegs, RegisterVT, ValueVT, V); Part += NumRegs; Parts.clear(); } @@ -736,7 +758,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { + SDValue &Chain, SDValue *Flag, + const Value *V) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Get the list of the values's legal parts. @@ -748,7 +771,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, EVT RegisterVT = RegVTs[Value]; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), - &Parts[Part], NumParts, RegisterVT); + &Parts[Part], NumParts, RegisterVT, V); Part += NumParts; } @@ -824,7 +847,8 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, AA = &aa; GFI = gfi; LibInfo = li; - TD = DAG.getTarget().getTargetData(); + TD = DAG.getTarget().getDataLayout(); + Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -992,7 +1016,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { unsigned InReg = It->second; RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); - N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); resolveDanglingDebugInfo(V, N); return N; } @@ -1147,7 +1171,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); SDValue Chain = DAG.getEntryNode(); - return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); } llvm_unreachable("Can't get register for value!"); @@ -1203,9 +1227,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ISD::NodeType ExtendKind = ISD::ANY_EXTEND; const Function *F = I.getParent()->getParent(); - if (F->paramHasAttr(0, Attribute::SExt)) + if (F->getRetAttributes().hasAttribute(Attributes::SExt)) ExtendKind = ISD::SIGN_EXTEND; - else if (F->paramHasAttr(0, Attribute::ZExt)) + else if (F->getRetAttributes().hasAttribute(Attributes::ZExt)) ExtendKind = ISD::ZERO_EXTEND; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) @@ -1216,11 +1240,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SmallVector<SDValue, 4> Parts(NumParts); getCopyToParts(DAG, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), - &Parts[0], NumParts, PartVT, ExtendKind); + &Parts[0], NumParts, PartVT, &I, ExtendKind); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (F->paramHasAttr(0, Attribute::InReg)) + if (F->getRetAttributes().hasAttribute(Attributes::InReg)) Flags.setInReg(); // Propagate extension type if any @@ -1231,7 +1255,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { for (unsigned i = 0; i < NumParts; ++i) { Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), - /*isfixed=*/true)); + /*isfixed=*/true, 0, 0)); OutVals.push_back(Parts[i]); } } @@ -1601,7 +1625,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, // Update successor info addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); - addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); + // TrueBB and FalseBB are always different unless the incoming IR is + // degenerate. This only happens when running llc on weird IR. + if (CB.TrueBB != CB.FalseBB) + addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1762,6 +1789,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, /// visitBitTestCase - this function produces one "bit test" void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, + uint32_t BranchWeightToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { @@ -1799,8 +1827,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, ISD::SETNE); } - addSuccessorWithWeight(SwitchBB, B.TargetBB); - addSuccessorWithWeight(SwitchBB, NextMBB); + // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. + addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight); + // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. + addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -1923,6 +1953,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; + BranchProbabilityInfo *BPI = FuncInfo.BPI; // If any two of the cases has the same destination, and if one value // is the same as the other, but has one bit unset that the other has set, // use bit manipulation to do two compares at once. For example: @@ -1956,8 +1987,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, ISD::SETEQ); // Update successor info. - addSuccessorWithWeight(SwitchBB, Small.BB); - addSuccessorWithWeight(SwitchBB, Default); + // Both Small and Big will jump to Small.BB, so we sum up the weights. + addSuccessorWithWeight(SwitchBB, Small.BB, + Small.ExtraWeight + Big.ExtraWeight); + addSuccessorWithWeight(SwitchBB, Default, + // The default destination is the first successor in IR. + BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0); // Insert the true branch. SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, @@ -1975,14 +2010,13 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } // Order cases by weight so the most likely case will be checked first. - BranchProbabilityInfo *BPI = FuncInfo.BPI; + uint32_t UnhandledWeights = 0; if (BPI) { for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) { - uint32_t IWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(), - I->BB->getBasicBlock()); + uint32_t IWeight = I->ExtraWeight; + UnhandledWeights += IWeight; for (CaseItr J = CR.Range.first; J < I; ++J) { - uint32_t JWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(), - J->BB->getBasicBlock()); + uint32_t JWeight = J->ExtraWeight; if (IWeight > JWeight) std::swap(*I, *J); } @@ -2031,10 +2065,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, LHS = I->Low; MHS = SV; RHS = I->High; } - uint32_t ExtraWeight = I->ExtraWeight; + // The false weight should be sum of all un-handled cases. + UnhandledWeights -= I->ExtraWeight; CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough, /* me */ CurBlock, - /* trueweight */ ExtraWeight / 2, /* falseweight */ ExtraWeight / 2); + /* trueweight */ I->ExtraWeight, + /* falseweight */ UnhandledWeights); // If emitting the first comparison, just call visitSwitchCase to emit the // code into the current block. Otherwise, push the CaseBlock onto the @@ -2079,7 +2115,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) TSize += I->size(); - if (!areJTsAllowed(TLI) || TSize.ult(4)) + if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) return false; APInt Range = ComputeRange(First, Last); @@ -2134,13 +2170,28 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, } } + // Calculate weight for each unique destination in CR. + DenseMap<MachineBasicBlock*, uint32_t> DestWeights; + if (FuncInfo.BPI) + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { + DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = + DestWeights.find(I->BB); + if (Itr != DestWeights.end()) + Itr->second += I->ExtraWeight; + else + DestWeights[I->BB] = I->ExtraWeight; + } + // Update successor info. Add one edge to each unique successor. BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), E = DestBBs.end(); I != E; ++I) { if (!SuccsHandled[(*I)->getNumber()]) { SuccsHandled[(*I)->getNumber()] = true; - addSuccessorWithWeight(JumpTableBB, *I); + DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = + DestWeights.find(*I); + addSuccessorWithWeight(JumpTableBB, *I, + Itr != DestWeights.end() ? Itr->second : 0); } } @@ -2371,7 +2422,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, if (i == count) { assert((count < 3) && "Too much destinations to test!"); - CasesBits.push_back(CaseBits(0, Dest, 0)); + CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/)); count++; } @@ -2380,6 +2431,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, uint64_t lo = (lowValue - lowBound).getZExtValue(); uint64_t hi = (highValue - lowBound).getZExtValue(); + CasesBits[i].ExtraWeight += I->ExtraWeight; for (uint64_t j = lo; j <= hi; j++) { CasesBits[i].Mask |= 1ULL << j; @@ -2407,7 +2459,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, CurMF->insert(BBI, CaseBB); BTC.push_back(BitTestCase(CasesBits[i].Mask, CaseBB, - CasesBits[i].BB)); + CasesBits[i].BB, CasesBits[i].ExtraWeight)); // Put SV in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(SV); @@ -2435,30 +2487,25 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, Clusterifier TheClusterifier; + BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; - TheClusterifier.add(i.getCaseValueEx(), SMBB); + TheClusterifier.add(i.getCaseValueEx(), SMBB, + BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0); } TheClusterifier.optimize(); - BranchProbabilityInfo *BPI = FuncInfo.BPI; size_t numCmps = 0; for (Clusterifier::RangeIterator i = TheClusterifier.begin(), e = TheClusterifier.end(); i != e; ++i, ++numCmps) { Clusterifier::Cluster &C = *i; - unsigned W = 0; - if (BPI) { - W = BPI->getEdgeWeight(SI.getParent(), C.second->getBasicBlock()); - if (!W) - W = 16; - W *= C.first.Weight; - BPI->setEdgeWeight(SI.getParent(), C.second->getBasicBlock(), W); - } + // Update edge weight for the cluster. + unsigned W = C.first.Weight; // FIXME: Currently work with ConstantInt based numbers. // Changing it to APInt based is a pretty heavy for this commit. @@ -2540,9 +2587,10 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) continue; - // If the switch has more than 5 blocks, and at least 40% dense, and the + // If the switch has more than N blocks, and is at least 40% dense, and the // target supports indirect branches, then emit a jump table rather than // lowering the switch to a binary tree of conditional branches. + // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries(). if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) continue; @@ -2556,14 +2604,14 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; // Update machine-CFG edges with unique successors. - SmallVector<BasicBlock*, 32> succs; - succs.reserve(I.getNumSuccessors()); - for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) - succs.push_back(I.getSuccessor(i)); - array_pod_sort(succs.begin(), succs.end()); - succs.erase(std::unique(succs.begin(), succs.end()), succs.end()); - for (unsigned i = 0, e = succs.size(); i != e; ++i) { - MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]]; + SmallSet<BasicBlock*, 32> Done; + for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { + BasicBlock *BB = I.getSuccessor(i); + bool Inserted = Done.insert(BB); + if (!Inserted) + continue; + + MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; addSuccessorWithWeight(IndirectBrMBB, Succ); } @@ -3160,9 +3208,9 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { return; // getValue will auto-populate this. Type *Ty = I.getAllocatedType(); - uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), I.getAlignment()); SDValue AllocSize = getValue(I.getArraySize()); @@ -3460,7 +3508,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDValue InChain = getRoot(); - EVT VT = EVT::getEVT(I.getType()); + EVT VT = TLI.getValueType(I.getType()); if (I.getAlignment() * 8 < VT.getSizeInBits()) report_fatal_error("Cannot generate unaligned atomic load"); @@ -3490,7 +3538,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDValue InChain = getRoot(); - EVT VT = EVT::getEVT(I.getValueOperand()->getType()); + EVT VT = TLI.getValueType(I.getValueOperand()->getType()); if (I.getAlignment() * 8 < VT.getSizeInBits()) report_fatal_error("Cannot generate unaligned atomic store"); @@ -4352,7 +4400,7 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, return DAG.getConstantFP(1.0, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->hasFnAttr(Attribute::OptimizeForSize) || + if (!F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize) || // If optimizing for size, don't insert too many multiplies. This // inserts up to 5 multiplies. CountPopulation_32(Val)+Log2_32(Val) < 7) { @@ -4850,7 +4898,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - DAG.getConstant(Idx, MVT::i32)); + DAG.getIntPtrConstant(Idx)); + setValue(&I, Res); + return 0; + } + case Intrinsic::x86_avx_vextractf128_pd_256: + case Intrinsic::x86_avx_vextractf128_ps_256: + case Intrinsic::x86_avx_vextractf128_si_256: + case Intrinsic::x86_avx2_vextracti128: { + DebugLoc dl = getCurDebugLoc(); + EVT DestVT = TLI.getValueType(I.getType()); + uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * + DestVT.getVectorNumElements(); + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, + getValue(I.getArgOperand(0)), + DAG.getIntPtrConstant(Idx)); setValue(&I, Res); return 0; } @@ -5113,10 +5175,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } + case Intrinsic::debugtrap: case Intrinsic::trap: { StringRef TrapFuncName = TM.Options.getTrapFunctionName(); if (TrapFuncName.empty()) { - DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? + ISD::TRAP : ISD::DEBUGTRAP; + DAG.setRoot(DAG.getNode(Op, dl,MVT::Other, getRoot())); return 0; } TargetLowering::ArgListTy Args; @@ -5131,10 +5196,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(Result.second); return 0; } - case Intrinsic::debugtrap: { - DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, dl,MVT::Other, getRoot())); - return 0; - } + case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: case Intrinsic::usub_with_overflow: @@ -5177,14 +5239,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { rw==1)); /* write */ return 0; } + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: { + bool IsStart = (Intrinsic == Intrinsic::lifetime_start); + // Stack coloring is not enabled in O0, discard region information. + if (TM.getOptLevel() == CodeGenOpt::None) + return 0; + + SmallVector<Value *, 4> Allocas; + GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD); + + for (SmallVector<Value*, 4>::iterator Object = Allocas.begin(), + E = Allocas.end(); Object != E; ++Object) { + AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); + // Could not find an Alloca. + if (!LifetimeObject) + continue; + + int FI = FuncInfo.StaticAllocaMap[LifetimeObject]; + + SDValue Ops[2]; + Ops[0] = getRoot(); + Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); + unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); + + Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2); + DAG.setRoot(Res); + } + } case Intrinsic::invariant_start: - case Intrinsic::lifetime_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); return 0; case Intrinsic::invariant_end: - case Intrinsic::lifetime_end: // Discard region information. return 0; case Intrinsic::donothing: @@ -5220,9 +5308,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, int DemoteStackIdx = -100; if (!CanLowerReturn) { - uint64_t TySize = TLI.getTargetData()->getTypeAllocSize( + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize( FTy->getReturnType()); - unsigned Align = TLI.getTargetData()->getPrefTypeAlignment( + unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment( FTy->getReturnType()); MachineFunction &MF = DAG.getMachineFunction(); DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); @@ -5254,12 +5342,12 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Entry.Node = ArgNode; Entry.Ty = V->getType(); unsigned attrInd = i - CS.arg_begin() + 1; - Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); - Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); - Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); - Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); - Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); - Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); + Entry.isSExt = CS.paramHasAttr(attrInd, Attributes::SExt); + Entry.isZExt = CS.paramHasAttr(attrInd, Attributes::ZExt); + Entry.isInReg = CS.paramHasAttr(attrInd, Attributes::InReg); + Entry.isSRet = CS.paramHasAttr(attrInd, Attributes::StructRet); + Entry.isNest = CS.paramHasAttr(attrInd, Attributes::Nest); + Entry.isByVal = CS.paramHasAttr(attrInd, Attributes::ByVal); Entry.Alignment = CS.getParamAlignment(attrInd); Args.push_back(Entry); } @@ -5687,7 +5775,7 @@ public: /// MVT::Other. EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, - const TargetData *TD) const { + const DataLayout *TD) const { if (CallOperandVal == 0) return MVT::Other; if (isa<BasicBlock>(CallOperandVal)) @@ -5991,8 +6079,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Otherwise, create a stack slot and emit a store to it before the // asm. Type *Ty = OpVal->getType(); - uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); - unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); @@ -6040,12 +6128,36 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); - // Remember the HasSideEffect and AlignStack bits as operand 3. + // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore + // bits as operand 3. unsigned ExtraInfo = 0; if (IA->hasSideEffects()) ExtraInfo |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; + // Set the asm dialect. + ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; + + // Determine if this InlineAsm MayLoad or MayStore based on the constraints. + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(OpInfo, SDValue()); + + // Ideally, we would only check against memory constraints. However, the + // meaning of an other constraint can be target-specific and we can't easily + // reason about it. Therefore, be conservative and set MayLoad/MayStore + // for other constriants as well. + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + OpInfo.ConstraintType == TargetLowering::C_Other) { + if (OpInfo.Type == InlineAsm::isInput) + ExtraInfo |= InlineAsm::Extra_MayLoad; + else if (OpInfo.Type == InlineAsm::isOutput) + ExtraInfo |= InlineAsm::Extra_MayStore; + } + } + AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, TLI.getPointerTy())); @@ -6155,7 +6267,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), DAG, AsmNodeOperands); @@ -6237,7 +6349,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, CS.getInstruction()); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, DAG, AsmNodeOperands); @@ -6268,7 +6380,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // and set it as the value of the call. if (!RetValRegs.Regs.empty()) { SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, CS.getInstruction()); // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { @@ -6308,7 +6420,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; const Value *Ptr = IndirectStoresToEmit[i].second; SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, IA); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); } @@ -6338,7 +6450,7 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { - const TargetData &TD = *TLI.getTargetData(); + const DataLayout &TD = *TLI.getDataLayout(); SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), @@ -6384,7 +6496,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = - getTargetData()->getABITypeAlignment(ArgTy); + getDataLayout()->getABITypeAlignment(ArgTy); if (Args[i].isZExt) Flags.setZExt(); @@ -6398,7 +6510,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setByVal(); PointerType *Ty = cast<PointerType>(Args[i].Ty); Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy)); + Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); // For ByVal, alignment should come from FE. BE will guess if this // info is not there but there are cases it cannot get right. unsigned FrameAlign; @@ -6423,12 +6535,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { ExtendKind = ISD::ZERO_EXTEND; getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, - PartVT, ExtendKind); + PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), - i < CLI.NumFixedArgs); + i < CLI.NumFixedArgs, + i, j*Parts[j].getValueType().getStoreSize()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) @@ -6504,7 +6617,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], - NumRegs, RegisterVT, VT, + NumRegs, RegisterVT, VT, NULL, AssertOp)); CurReg += NumRegs; } @@ -6543,7 +6656,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); - RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0); + RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0, V); PendingExports.push_back(Chain); } @@ -6573,7 +6686,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { const Function &F = *LLVMBB->getParent(); SelectionDAG &DAG = SDB->DAG; DebugLoc dl = SDB->getCurDebugLoc(); - const TargetData *TD = TLI.getTargetData(); + const DataLayout *TD = TLI.getDataLayout(); SmallVector<ISD::InputArg, 16> Ins; // Check whether the function can return without sret-demotion. @@ -6591,7 +6704,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { ISD::ArgFlagsTy Flags; Flags.setSRet(); EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]); - ISD::InputArg RetArg(Flags, RegisterVT, true); + ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0); Ins.push_back(RetArg); } @@ -6610,15 +6723,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { unsigned OriginalAlignment = TD->getABITypeAlignment(ArgTy); - if (F.paramHasAttr(Idx, Attribute::ZExt)) + if (F.getParamAttributes(Idx).hasAttribute(Attributes::ZExt)) Flags.setZExt(); - if (F.paramHasAttr(Idx, Attribute::SExt)) + if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt)) Flags.setSExt(); - if (F.paramHasAttr(Idx, Attribute::InReg)) + if (F.getParamAttributes(Idx).hasAttribute(Attributes::InReg)) Flags.setInReg(); - if (F.paramHasAttr(Idx, Attribute::StructRet)) + if (F.getParamAttributes(Idx).hasAttribute(Attributes::StructRet)) Flags.setSRet(); - if (F.paramHasAttr(Idx, Attribute::ByVal)) { + if (F.getParamAttributes(Idx).hasAttribute(Attributes::ByVal)) { Flags.setByVal(); PointerType *Ty = cast<PointerType>(I->getType()); Type *ElementTy = Ty->getElementType(); @@ -6632,14 +6745,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { FrameAlign = TLI.getByValTypeAlignment(ElementTy); Flags.setByValAlign(FrameAlign); } - if (F.paramHasAttr(Idx, Attribute::Nest)) + if (F.getParamAttributes(Idx).hasAttribute(Attributes::Nest)) Flags.setNest(); Flags.setOrigAlign(OriginalAlignment); EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed); + ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed, + Idx-1, i*RegisterVT.getStoreSize()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 @@ -6685,7 +6799,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT); ISD::NodeType AssertOp = ISD::DELETED_NODE; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, - RegVT, VT, AssertOp); + RegVT, VT, NULL, AssertOp); MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); @@ -6719,14 +6833,14 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { if (!I->use_empty()) { ISD::NodeType AssertOp = ISD::DELETED_NODE; - if (F.paramHasAttr(Idx, Attribute::SExt)) + if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt)) AssertOp = ISD::AssertSext; - else if (F.paramHasAttr(Idx, Attribute::ZExt)) + else if (F.getParamAttributes(Idx).hasAttribute(Attributes::ZExt)) AssertOp = ISD::AssertZext; ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, PartVT, VT, - AssertOp)); + NULL, AssertOp)); } i += NumParts; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 4090002..9e46d96 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -66,7 +66,7 @@ class ShuffleVectorInst; class SIToFPInst; class StoreInst; class SwitchInst; -class TargetData; +class DataLayout; class TargetLibraryInfo; class TargetLowering; class TruncInst; @@ -150,9 +150,11 @@ private: uint64_t Mask; MachineBasicBlock* BB; unsigned Bits; + uint32_t ExtraWeight; - CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits): - Mask(mask), BB(bb), Bits(bits) { } + CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits, + uint32_t Weight): + Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { } }; typedef std::vector<Case> CaseVector; @@ -247,11 +249,13 @@ private: typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock; struct BitTestCase { - BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr): - Mask(M), ThisBB(T), TargetBB(Tr) { } + BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr, + uint32_t Weight): + Mask(M), ThisBB(T), TargetBB(Tr), ExtraWeight(Weight) { } uint64_t Mask; MachineBasicBlock *ThisBB; MachineBasicBlock *TargetBB; + uint32_t ExtraWeight; }; typedef SmallVector<BitTestCase, 3> BitTestInfo; @@ -281,7 +285,7 @@ public: const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; - const TargetData *TD; + const DataLayout *TD; AliasAnalysis *AA; const TargetLibraryInfo *LibInfo; @@ -325,7 +329,7 @@ public: CodeGenOpt::Level ol) : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), - HasTailCall(false), Context(dag.getContext()) { + HasTailCall(false) { } void init(GCFunctionInfo *gfi, AliasAnalysis &aa, @@ -452,6 +456,7 @@ public: void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); void visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, + uint32_t BranchWeightToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 13cd011..6f3ce7a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -267,6 +267,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STACKRESTORE: return "stackrestore"; case ISD::TRAP: return "trap"; case ISD::DEBUGTRAP: return "debugtrap"; + case ISD::LIFETIME_START: return "lifetime.start"; + case ISD::LIFETIME_END: return "lifetime.end"; // Bit manipulation case ISD::BSWAP: return "bswap"; @@ -331,7 +333,7 @@ void SDNode::dump(const SelectionDAG *G) const { } void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { - OS << (void*)this << ": "; + OS << (const void*)this << ": "; for (unsigned i = 0, e = getNumValues(); i != e; ++i) { if (i) OS << ","; @@ -473,11 +475,16 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << "<" << *M->getMemOperand() << ">"; } else if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(this)) { + int64_t offset = BA->getOffset(); OS << "<"; WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); OS << ", "; WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); OS << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; if (unsigned int TF = BA->getTargetFlags()) OS << " [TF=" << TF << ']'; } @@ -559,7 +566,7 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, child->printr(OS, G); once.insert(child); } else { // Just the address. FIXME: also print the child's opcode. - OS << (void*)child; + OS << (const void*)child; if (unsigned RN = N->getOperand(i).getResNo()) OS << ":" << RN; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 4e5e3ba..c314fa5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -474,6 +474,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MRI.replaceRegWith(From, To); } + // Freeze the set of reserved registers now that MachineFrameInfo has been + // set up. All the information required by getReservedRegs() should be + // available now. + MRI.freezeReservedRegs(*MF); + // Release function-specific state. SDB and CurDAG are already cleared // at this point. FuncInfo->clear(); @@ -554,7 +559,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #endif { BlockNumber = FuncInfo->MBB->getNumber(); - BlockName = MF->getFunction()->getName().str() + ":" + + BlockName = MF->getName().str() + ":" + FuncInfo->MBB->getBasicBlock()->getName().str(); } DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber @@ -1209,7 +1214,12 @@ SelectionDAGISel::FinishBasicBlock() { CodeGenAndEmitDAG(); } + uint32_t UnhandledWeight = 0; + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) + UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight; + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { + UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight; // Set the current basic block to the mbb we wish to insert the code into FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); @@ -1217,12 +1227,14 @@ SelectionDAGISel::FinishBasicBlock() { if (j+1 != ej) SDB->visitBitTestCase(SDB->BitTestCases[i], SDB->BitTestCases[i].Cases[j+1].ThisBB, + UnhandledWeight, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], FuncInfo->MBB); else SDB->visitBitTestCase(SDB->BitTestCases[i], SDB->BitTestCases[i].Default, + UnhandledWeight, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], FuncInfo->MBB); @@ -1794,10 +1806,13 @@ WalkChainUsers(const SDNode *ChainedNode, User->getOpcode() == ISD::HANDLENODE) // Root of the graph. continue; - if (User->getOpcode() == ISD::CopyToReg || - User->getOpcode() == ISD::CopyFromReg || - User->getOpcode() == ISD::INLINEASM || - User->getOpcode() == ISD::EH_LABEL) { + unsigned UserOpcode = User->getOpcode(); + if (UserOpcode == ISD::CopyToReg || + UserOpcode == ISD::CopyFromReg || + UserOpcode == ISD::INLINEASM || + UserOpcode == ISD::EH_LABEL || + UserOpcode == ISD::LIFETIME_START || + UserOpcode == ISD::LIFETIME_END) { // If their node ID got reset to -1 then they've already been selected. // Treat them like a MachineOpcode. if (User->getNodeId() == -1) @@ -1994,7 +2009,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, return Res; } -/// CheckPatternPredicate - Implements OP_CheckPatternPredicate. +/// CheckSame - Implements OP_CheckSame. LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, @@ -2213,6 +2228,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::CopyFromReg: case ISD::CopyToReg: case ISD::EH_LABEL: + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: NodeToMatch->setNodeId(-1); // Mark selected. return 0; case ISD::AssertSext: @@ -2981,7 +2998,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && N->getOpcode() != ISD::INTRINSIC_VOID) { N->printrFull(Msg, CurDAG); - Msg << "\nIn function: " << MF->getFunction()->getName(); + Msg << "\nIn function: " << MF->getName(); } else { bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; unsigned iid = diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 173ffac..3921635 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -14,7 +14,6 @@ #include "ScheduleDAGSDNodes.h" #include "llvm/Constants.h" #include "llvm/DebugInfo.h" -#include "llvm/Function.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -50,7 +49,7 @@ namespace llvm { template<typename EdgeIter> static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) { - return itostr(I - SDNodeIterator::begin((SDNode *) Node)); + return itostr(I - SDNodeIterator::begin((const SDNode *) Node)); } /// edgeTargetsEdgeSource - This method returns true if this outgoing edge @@ -73,7 +72,7 @@ namespace llvm { } static std::string getGraphName(const SelectionDAG *G) { - return G->getMachineFunction().getFunction()->getName(); + return G->getMachineFunction().getName(); } static bool renderGraphFromBottomUp() { @@ -146,7 +145,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, void SelectionDAG::viewGraph(const std::string &Title) { // This code is only for debugging! #ifndef NDEBUG - ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), + ViewGraph(this, "dag." + getMachineFunction().getName(), false, Title); #else errs() << "SelectionDAG::viewGraph is only available in debug builds on " diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 6820175..49f55e2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -14,7 +14,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -515,7 +515,7 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { /// NOTE: The constructor takes ownership of TLOF. TargetLowering::TargetLowering(const TargetMachine &tm, const TargetLoweringObjectFile *tlof) - : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) { + : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) { // All operations default to being supported. memset(OpActions, 0, sizeof(OpActions)); memset(LoadExtActions, 0, sizeof(LoadExtActions)); @@ -583,8 +583,13 @@ TargetLowering::TargetLowering(const TargetMachine &tm, // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); + // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" + // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. + // + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); + IsLittleEndian = TD->isLittleEndian(); - PointerTy = MVT::getIntegerVT(8*TD->getPointerSize()); + PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; @@ -613,6 +618,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, ShouldFoldAtomicFences = false; InsertFencesForAtomic = false; SupportJumpTables = true; + MinimumJumpTableEntries = 4; InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); @@ -624,7 +630,7 @@ TargetLowering::~TargetLowering() { } MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const { - return MVT::getIntegerVT(8*TD->getPointerSize()); + return MVT::getIntegerVT(8*TD->getPointerSize(0)); } /// canOpTrap - Returns true if the operation can trap for the value type. @@ -772,7 +778,7 @@ void TargetLowering::computeRegisterProperties() { LegalIntReg = IntReg; } else { RegisterTypeForVT[IntReg] = TransformToType[IntReg] = - (MVT::SimpleValueType)LegalIntReg; + (const MVT::SimpleValueType)LegalIntReg; ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); } } @@ -898,10 +904,9 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { return NULL; } - EVT TargetLowering::getSetCCResultType(EVT VT) const { assert(!VT.isVector() && "No default SetCC type for vectors!"); - return PointerTy.SimpleTy; + return getPointerTy(0).SimpleTy; } MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const { @@ -997,9 +1002,9 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, EVT VT = ValueVTs[j]; ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - if (attr & Attribute::SExt) + if (attr.hasAttribute(Attributes::SExt)) ExtendKind = ISD::SIGN_EXTEND; - else if (attr & Attribute::ZExt) + else if (attr.hasAttribute(Attributes::ZExt)) ExtendKind = ISD::ZERO_EXTEND; // FIXME: C calling convention requires the return type to be promoted to @@ -1017,18 +1022,17 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (attr & Attribute::InReg) + if (attr.hasAttribute(Attributes::InReg)) Flags.setInReg(); // Propagate extension type if any - if (attr & Attribute::SExt) + if (attr.hasAttribute(Attributes::SExt)) Flags.setSExt(); - else if (attr & Attribute::ZExt) + else if (attr.hasAttribute(Attributes::ZExt)) Flags.setZExt(); - for (unsigned i = 0; i < NumParts; ++i) { - Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true)); - } + for (unsigned i = 0; i < NumParts; ++i) + Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0)); } } @@ -1062,7 +1066,7 @@ SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) || (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress)) - return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); + return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(0)); return Table; } @@ -2441,7 +2445,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0 == N1) { // The sext(setcc()) => setcc() optimization relies on the appropriate // constant being emitted. - uint64_t EqVal; + uint64_t EqVal = 0; switch (getBooleanContents(N0.getValueType().isVector())) { case UndefinedBooleanContent: case ZeroOrOneBooleanContent: @@ -2954,8 +2958,9 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true); break; } - } else if (dyn_cast<PointerType>(OpTy)) { - OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize()); + } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) { + OpInfo.ConstraintVT = MVT::getIntegerVT( + 8*TD->getPointerSize(PT->getAddressSpace())); } else { OpInfo.ConstraintVT = EVT::getEVT(OpTy, true); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp index a081e3c..f769b44 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp @@ -16,7 +16,7 @@ using namespace llvm; TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM) - : TD(TM.getTargetData()) { + : TD(TM.getDataLayout()) { } TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp index 21ae2f5..4fbe1b3 100644 --- a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp +++ b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp @@ -159,7 +159,7 @@ void PEI::initShrinkWrappingInfo() { // via --shrink-wrap-func=<funcname>. #ifndef NDEBUG if (ShrinkWrapFunc != "") { - std::string MFName = MF->getFunction()->getName().str(); + std::string MFName = MF->getName().str(); ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); } #endif @@ -187,7 +187,7 @@ void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { DEBUG(if (ShrinkWrapThisFunction) { dbgs() << "Place CSR spills/restores for " - << MF->getFunction()->getName() << "\n"; + << MF->getName() << "\n"; }); if (calculateSets(Fn)) @@ -364,7 +364,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { // If no CSRs used, we are done. if (CSI.empty()) { DEBUG(if (ShrinkWrapThisFunction) - dbgs() << "DISABLED: " << Fn.getFunction()->getName() + dbgs() << "DISABLED: " << Fn.getName() << ": uses no callee-saved registers\n"); return false; } @@ -384,7 +384,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { // implementation to functions with <= 500 MBBs. if (Fn.size() > 500) { DEBUG(if (ShrinkWrapThisFunction) - dbgs() << "DISABLED: " << Fn.getFunction()->getName() + dbgs() << "DISABLED: " << Fn.getName() << ": too large (" << Fn.size() << " MBBs)\n"); ShrinkWrapThisFunction = false; } @@ -466,7 +466,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { } if (allCSRUsesInEntryBlock) { - DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getName() << ": all CSRs used in EntryBlock\n"); ShrinkWrapThisFunction = false; } else { @@ -478,7 +478,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { allCSRsUsedInEntryFanout = false; } if (allCSRsUsedInEntryFanout) { - DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getName() << ": all CSRs used in imm successors of EntryBlock\n"); ShrinkWrapThisFunction = false; } @@ -505,7 +505,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { if (dominatesExitNodes) { CSRUsedInChokePoints |= CSRUsed[MBB]; if (CSRUsedInChokePoints == UsedCSRegs) { - DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getName() << ": all CSRs used in choke point(s) at " << getBasicBlockName(MBB) << "\n"); ShrinkWrapThisFunction = false; @@ -521,7 +521,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { return false; DEBUG({ - dbgs() << "ENABLED: " << Fn.getFunction()->getName(); + dbgs() << "ENABLED: " << Fn.getName(); if (HasFastExitPath) dbgs() << " (fast exit path)"; dbgs() << "\n"; @@ -861,7 +861,7 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) { DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { dbgs() << "-----------------------------------------------------------\n"; dbgs() << "total iterations = " << iterations << " ( " - << Fn.getFunction()->getName() + << Fn.getName() << " " << numSRReducedThisFunc << " " << Fn.size() << " )\n"; @@ -984,7 +984,7 @@ void PEI::verifySpillRestorePlacement() { if (isReturnBlock(SBB) || SBB->succ_size() == 0) { if (restored != spilled) { CSRegSet notRestored = (spilled - restored); - DEBUG(dbgs() << MF->getFunction()->getName() << ": " + DEBUG(dbgs() << MF->getName() << ": " << stringifyCSRegSet(notRestored) << " spilled at " << getBasicBlockName(MBB) << " are never restored on path to return " @@ -1032,7 +1032,7 @@ void PEI::verifySpillRestorePlacement() { } if (spilled != restored) { CSRegSet notSpilled = (restored - spilled); - DEBUG(dbgs() << MF->getFunction()->getName() << ": " + DEBUG(dbgs() << MF->getName() << ": " << stringifyCSRegSet(notSpilled) << " restored at " << getBasicBlockName(MBB) << " are never spilled\n"); diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp index 980bd74..4b566fc 100644 --- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -30,7 +30,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -191,58 +191,43 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { // that needs to be restored on all exits from the function. This is an alloca // because the value needs to be added to the global context list. unsigned Align = - TLI->getTargetData()->getPrefTypeAlignment(FunctionContextTy); + TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); FuncCtx = new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin()); // Fill in the function context structure. - Type *Int32Ty = Type::getInt32Ty(F.getContext()); - Value *Zero = ConstantInt::get(Int32Ty, 0); - Value *One = ConstantInt::get(Int32Ty, 1); - Value *Two = ConstantInt::get(Int32Ty, 2); - Value *Three = ConstantInt::get(Int32Ty, 3); - Value *Four = ConstantInt::get(Int32Ty, 4); - - Value *Idxs[2] = { Zero, 0 }; - for (unsigned I = 0, E = LPads.size(); I != E; ++I) { LandingPadInst *LPI = LPads[I]; IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt()); // Reference the __data field. - Idxs[1] = Two; - Value *FCData = Builder.CreateGEP(FuncCtx, Idxs, "__data"); + Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data"); // The exception values come back in context->__data[0]. - Idxs[1] = Zero; - Value *ExceptionAddr = Builder.CreateGEP(FCData, Idxs, "exception_gep"); + Value *ExceptionAddr = Builder.CreateConstGEP2_32(FCData, 0, 0, + "exception_gep"); Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val"); - ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext())); + ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy()); - Idxs[1] = One; - Value *SelectorAddr = Builder.CreateGEP(FCData, Idxs, "exn_selector_gep"); + Value *SelectorAddr = Builder.CreateConstGEP2_32(FCData, 0, 1, + "exn_selector_gep"); Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); substituteLPadValues(LPI, ExnVal, SelVal); } // Personality function - Idxs[1] = Three; + IRBuilder<> Builder(EntryBB->getTerminator()); if (!PersonalityFn) PersonalityFn = LPads[0]->getPersonalityFn(); - Value *PersonalityFieldPtr = - GetElementPtrInst::Create(FuncCtx, Idxs, "pers_fn_gep", - EntryBB->getTerminator()); - new StoreInst(PersonalityFn, PersonalityFieldPtr, true, - EntryBB->getTerminator()); + Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 3, + "pers_fn_gep"); + Builder.CreateStore(PersonalityFn, PersonalityFieldPtr, /*isVolatile=*/true); // LSDA address - Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr", - EntryBB->getTerminator()); - Idxs[1] = Four; - Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep", - EntryBB->getTerminator()); - new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator()); + Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr"); + Value *LSDAFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 4, "lsda_gep"); + Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true); return FuncCtx; } @@ -417,48 +402,31 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { Value *FuncCtx = setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); BasicBlock *EntryBB = F.begin(); - Type *Int32Ty = Type::getInt32Ty(F.getContext()); - - Value *Idxs[2] = { - ConstantInt::get(Int32Ty, 0), 0 - }; + IRBuilder<> Builder(EntryBB->getTerminator()); // Get a reference to the jump buffer. - Idxs[1] = ConstantInt::get(Int32Ty, 5); - Value *JBufPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "jbuf_gep", - EntryBB->getTerminator()); + Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep"); // Save the frame pointer. - Idxs[1] = ConstantInt::get(Int32Ty, 0); - Value *FramePtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep", - EntryBB->getTerminator()); + Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep"); - Value *Val = CallInst::Create(FrameAddrFn, - ConstantInt::get(Int32Ty, 0), - "fp", - EntryBB->getTerminator()); - new StoreInst(Val, FramePtr, true, EntryBB->getTerminator()); + Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp"); + Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true); // Save the stack pointer. - Idxs[1] = ConstantInt::get(Int32Ty, 2); - Value *StackPtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep", - EntryBB->getTerminator()); + Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep"); - Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator()); - new StoreInst(Val, StackPtr, true, EntryBB->getTerminator()); + Val = Builder.CreateCall(StackAddrFn, "sp"); + Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true); // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. - Value *SetjmpArg = CastInst::Create(Instruction::BitCast, JBufPtr, - Type::getInt8PtrTy(F.getContext()), "", - EntryBB->getTerminator()); - CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "", EntryBB->getTerminator()); + Value *SetjmpArg = Builder.CreateBitCast(JBufPtr, Builder.getInt8PtrTy()); + Builder.CreateCall(BuiltinSetjmpFn, SetjmpArg); // Store a pointer to the function context so that the back-end will know // where to look for it. - Value *FuncCtxArg = CastInst::Create(Instruction::BitCast, FuncCtx, - Type::getInt8PtrTy(F.getContext()), "", - EntryBB->getTerminator()); - CallInst::Create(FuncCtxFn, FuncCtxArg, "", EntryBB->getTerminator()); + Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy()); + Builder.CreateCall(FuncCtxFn, FuncCtxArg); // At this point, we are all set up, update the invoke instructions to mark // their call_site values. diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp index c8c3fb3..95faafab 100644 --- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp @@ -143,6 +143,7 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void SlotIndexes::dump() const { for (IndexList::const_iterator itr = indexList.begin(); itr != indexList.end(); ++itr) { @@ -159,6 +160,7 @@ void SlotIndexes::dump() const { dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';' << MBBRanges[i].second << ")\n"; } +#endif // Print a SlotIndex to a raw_ostream. void SlotIndex::print(raw_ostream &os) const { @@ -168,9 +170,11 @@ void SlotIndex::print(raw_ostream &os) const { os << "invalid"; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) // Dump a SlotIndex to stderr. void SlotIndex::dump() const { print(dbgs()); dbgs() << "\n"; } +#endif diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp index 4a2b7ec..dca15ee 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp @@ -356,6 +356,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { Edit->anyRematerializable(0); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void SplitEditor::dump() const { if (RegAssign.empty()) { dbgs() << " empty\n"; @@ -366,6 +367,7 @@ void SplitEditor::dump() const { dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value(); dbgs() << '\n'; } +#endif VNInfo *SplitEditor::defValue(unsigned RegIdx, const VNInfo *ParentVNI, diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp new file mode 100644 index 0000000..1cbee84 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp @@ -0,0 +1,783 @@ +//===-- StackColoring.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements the stack-coloring optimization that looks for +// lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END), +// which represent the possible lifetime of stack slots. It attempts to +// merge disjoint stack slots and reduce the used stack space. +// NOTE: This pass is not StackSlotColoring, which optimizes spill slots. +// +// TODO: In the future we plan to improve stack coloring in the following ways: +// 1. Allow merging multiple small slots into a single larger slot at different +// offsets. +// 2. Merge this pass with StackSlotColoring and allow merging of allocas with +// spill slots. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stackcoloring" +#include "MachineTraceMetrics.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/DebugInfo.h" +#include "llvm/Instructions.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<bool> +DisableColoring("no-stack-coloring", + cl::init(false), cl::Hidden, + cl::desc("Disable stack coloring")); + +/// The user may write code that uses allocas outside of the declared lifetime +/// zone. This can happen when the user returns a reference to a local +/// data-structure. We can detect these cases and decide not to optimize the +/// code. If this flag is enabled, we try to save the user. +static cl::opt<bool> +ProtectFromEscapedAllocas("protect-from-escaped-allocas", + cl::init(false), cl::Hidden, + cl::desc("Do not optimize lifetime zones that are broken")); + +STATISTIC(NumMarkerSeen, "Number of lifetime markers found."); +STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); +STATISTIC(StackSlotMerged, "Number of stack slot merged."); +STATISTIC(EscapedAllocas, + "Number of allocas that escaped the lifetime region"); + +//===----------------------------------------------------------------------===// +// StackColoring Pass +//===----------------------------------------------------------------------===// + +namespace { +/// StackColoring - A machine pass for merging disjoint stack allocations, +/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions. +class StackColoring : public MachineFunctionPass { + MachineFrameInfo *MFI; + MachineFunction *MF; + + /// A class representing liveness information for a single basic block. + /// Each bit in the BitVector represents the liveness property + /// for a different stack slot. + struct BlockLifetimeInfo { + /// Which slots BEGINs in each basic block. + BitVector Begin; + /// Which slots ENDs in each basic block. + BitVector End; + /// Which slots are marked as LIVE_IN, coming into each basic block. + BitVector LiveIn; + /// Which slots are marked as LIVE_OUT, coming out of each basic block. + BitVector LiveOut; + }; + + /// Maps active slots (per bit) for each basic block. + DenseMap<MachineBasicBlock*, BlockLifetimeInfo> BlockLiveness; + + /// Maps serial numbers to basic blocks. + DenseMap<MachineBasicBlock*, int> BasicBlocks; + /// Maps basic blocks to a serial number. + SmallVector<MachineBasicBlock*, 8> BasicBlockNumbering; + + /// Maps liveness intervals for each slot. + SmallVector<LiveInterval*, 16> Intervals; + /// VNInfo is used for the construction of LiveIntervals. + VNInfo::Allocator VNInfoAllocator; + /// SlotIndex analysis object. + SlotIndexes *Indexes; + + /// The list of lifetime markers found. These markers are to be removed + /// once the coloring is done. + SmallVector<MachineInstr*, 8> Markers; + + /// SlotSizeSorter - A Sort utility for arranging stack slots according + /// to their size. + struct SlotSizeSorter { + MachineFrameInfo *MFI; + SlotSizeSorter(MachineFrameInfo *mfi) : MFI(mfi) { } + bool operator()(int LHS, int RHS) { + // We use -1 to denote a uninteresting slot. Place these slots at the end. + if (LHS == -1) return false; + if (RHS == -1) return true; + // Sort according to size. + return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); + } +}; + +public: + static char ID; + StackColoring() : MachineFunctionPass(ID) { + initializeStackColoringPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnMachineFunction(MachineFunction &MF); + +private: + /// Debug. + void dump(); + + /// Removes all of the lifetime marker instructions from the function. + /// \returns true if any markers were removed. + bool removeAllMarkers(); + + /// Scan the machine function and find all of the lifetime markers. + /// Record the findings in the BEGIN and END vectors. + /// \returns the number of markers found. + unsigned collectMarkers(unsigned NumSlot); + + /// Perform the dataflow calculation and calculate the lifetime for each of + /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and + /// LifetimeLIVE_OUT maps that represent which stack slots are live coming + /// in and out blocks. + void calculateLocalLiveness(); + + /// Construct the LiveIntervals for the slots. + void calculateLiveIntervals(unsigned NumSlots); + + /// Go over the machine function and change instructions which use stack + /// slots to use the joint slots. + void remapInstructions(DenseMap<int, int> &SlotRemap); + + /// The input program may contain intructions which are not inside lifetime + /// markers. This can happen due to a bug in the compiler or due to a bug in + /// user code (for example, returning a reference to a local variable). + /// This procedure checks all of the instructions in the function and + /// invalidates lifetime ranges which do not contain all of the instructions + /// which access that frame slot. + void removeInvalidSlotRanges(); + + /// Map entries which point to other entries to their destination. + /// A->B->C becomes A->C. + void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots); +}; +} // end anonymous namespace + +char StackColoring::ID = 0; +char &llvm::StackColoringID = StackColoring::ID; + +INITIALIZE_PASS_BEGIN(StackColoring, + "stack-coloring", "Merge disjoint stack slots", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_END(StackColoring, + "stack-coloring", "Merge disjoint stack slots", false, false) + +void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<SlotIndexes>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void StackColoring::dump() { + for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF); + FI != FE; ++FI) { + unsigned Num = BasicBlocks[*FI]; + DEBUG(dbgs()<<"Inspecting block #"<<Num<<" ["<<FI->getName()<<"]\n"); + Num = 0; + DEBUG(dbgs()<<"BEGIN : {"); + for (unsigned i=0; i < BlockLiveness[*FI].Begin.size(); ++i) + DEBUG(dbgs()<<BlockLiveness[*FI].Begin.test(i)<<" "); + DEBUG(dbgs()<<"}\n"); + + DEBUG(dbgs()<<"END : {"); + for (unsigned i=0; i < BlockLiveness[*FI].End.size(); ++i) + DEBUG(dbgs()<<BlockLiveness[*FI].End.test(i)<<" "); + + DEBUG(dbgs()<<"}\n"); + + DEBUG(dbgs()<<"LIVE_IN: {"); + for (unsigned i=0; i < BlockLiveness[*FI].LiveIn.size(); ++i) + DEBUG(dbgs()<<BlockLiveness[*FI].LiveIn.test(i)<<" "); + + DEBUG(dbgs()<<"}\n"); + DEBUG(dbgs()<<"LIVEOUT: {"); + for (unsigned i=0; i < BlockLiveness[*FI].LiveOut.size(); ++i) + DEBUG(dbgs()<<BlockLiveness[*FI].LiveOut.test(i)<<" "); + DEBUG(dbgs()<<"}\n"); + } +} + +unsigned StackColoring::collectMarkers(unsigned NumSlot) { + unsigned MarkersFound = 0; + // Scan the function to find all lifetime markers. + // NOTE: We use the a reverse-post-order iteration to ensure that we obtain a + // deterministic numbering, and because we'll need a post-order iteration + // later for solving the liveness dataflow problem. + for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF); + FI != FE; ++FI) { + + // Assign a serial number to this basic block. + BasicBlocks[*FI] = BasicBlockNumbering.size(); + BasicBlockNumbering.push_back(*FI); + + BlockLiveness[*FI].Begin.resize(NumSlot); + BlockLiveness[*FI].End.resize(NumSlot); + + for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end(); + BI != BE; ++BI) { + + if (BI->getOpcode() != TargetOpcode::LIFETIME_START && + BI->getOpcode() != TargetOpcode::LIFETIME_END) + continue; + + Markers.push_back(BI); + + bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START; + MachineOperand &MI = BI->getOperand(0); + unsigned Slot = MI.getIndex(); + + MarkersFound++; + + const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); + if (Allocation) { + DEBUG(dbgs()<<"Found a lifetime marker for slot #"<<Slot<< + " with allocation: "<< Allocation->getName()<<"\n"); + } + + if (IsStart) { + BlockLiveness[*FI].Begin.set(Slot); + } else { + if (BlockLiveness[*FI].Begin.test(Slot)) { + // Allocas that start and end within a single block are handled + // specially when computing the LiveIntervals to avoid pessimizing + // the liveness propagation. + BlockLiveness[*FI].Begin.reset(Slot); + } else { + BlockLiveness[*FI].End.set(Slot); + } + } + } + } + + // Update statistics. + NumMarkerSeen += MarkersFound; + return MarkersFound; +} + +void StackColoring::calculateLocalLiveness() { + // Perform a standard reverse dataflow computation to solve for + // global liveness. The BEGIN set here is equivalent to KILL in the standard + // formulation, and END is equivalent to GEN. The result of this computation + // is a map from blocks to bitvectors where the bitvectors represent which + // allocas are live in/out of that block. + SmallPtrSet<MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(), + BasicBlockNumbering.end()); + unsigned NumSSMIters = 0; + bool changed = true; + while (changed) { + changed = false; + ++NumSSMIters; + + SmallPtrSet<MachineBasicBlock*, 8> NextBBSet; + + for (SmallVector<MachineBasicBlock*, 8>::iterator + PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); + PI != PE; ++PI) { + + MachineBasicBlock *BB = *PI; + if (!BBSet.count(BB)) continue; + + BitVector LocalLiveIn; + BitVector LocalLiveOut; + + // Forward propagation from begins to ends. + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + PE = BB->pred_end(); PI != PE; ++PI) + LocalLiveIn |= BlockLiveness[*PI].LiveOut; + LocalLiveIn |= BlockLiveness[BB].End; + LocalLiveIn.reset(BlockLiveness[BB].Begin); + + // Reverse propagation from ends to begins. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + LocalLiveOut |= BlockLiveness[*SI].LiveIn; + LocalLiveOut |= BlockLiveness[BB].Begin; + LocalLiveOut.reset(BlockLiveness[BB].End); + + LocalLiveIn |= LocalLiveOut; + LocalLiveOut |= LocalLiveIn; + + // After adopting the live bits, we need to turn-off the bits which + // are de-activated in this block. + LocalLiveOut.reset(BlockLiveness[BB].End); + LocalLiveIn.reset(BlockLiveness[BB].Begin); + + // If we have both BEGIN and END markers in the same basic block then + // we know that the BEGIN marker comes after the END, because we already + // handle the case where the BEGIN comes before the END when collecting + // the markers (and building the BEGIN/END vectore). + // Want to enable the LIVE_IN and LIVE_OUT of slots that have both + // BEGIN and END because it means that the value lives before and after + // this basic block. + BitVector LocalEndBegin = BlockLiveness[BB].End; + LocalEndBegin &= BlockLiveness[BB].Begin; + LocalLiveIn |= LocalEndBegin; + LocalLiveOut |= LocalEndBegin; + + if (LocalLiveIn.test(BlockLiveness[BB].LiveIn)) { + changed = true; + BlockLiveness[BB].LiveIn |= LocalLiveIn; + + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + PE = BB->pred_end(); PI != PE; ++PI) + NextBBSet.insert(*PI); + } + + if (LocalLiveOut.test(BlockLiveness[BB].LiveOut)) { + changed = true; + BlockLiveness[BB].LiveOut |= LocalLiveOut; + + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + NextBBSet.insert(*SI); + } + } + + BBSet = NextBBSet; + }// while changed. +} + +void StackColoring::calculateLiveIntervals(unsigned NumSlots) { + SmallVector<SlotIndex, 16> Starts; + SmallVector<SlotIndex, 16> Finishes; + + // For each block, find which slots are active within this block + // and update the live intervals. + for (MachineFunction::iterator MBB = MF->begin(), MBBe = MF->end(); + MBB != MBBe; ++MBB) { + Starts.clear(); + Starts.resize(NumSlots); + Finishes.clear(); + Finishes.resize(NumSlots); + + // Create the interval for the basic blocks with lifetime markers in them. + for (SmallVector<MachineInstr*, 8>::iterator it = Markers.begin(), + e = Markers.end(); it != e; ++it) { + MachineInstr *MI = *it; + if (MI->getParent() != MBB) + continue; + + assert((MI->getOpcode() == TargetOpcode::LIFETIME_START || + MI->getOpcode() == TargetOpcode::LIFETIME_END) && + "Invalid Lifetime marker"); + + bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START; + MachineOperand &Mo = MI->getOperand(0); + int Slot = Mo.getIndex(); + assert(Slot >= 0 && "Invalid slot"); + + SlotIndex ThisIndex = Indexes->getInstructionIndex(MI); + + if (IsStart) { + if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) + Starts[Slot] = ThisIndex; + } else { + if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) + Finishes[Slot] = ThisIndex; + } + } + + // Create the interval of the blocks that we previously found to be 'alive'. + BitVector Alive = BlockLiveness[MBB].LiveIn; + Alive |= BlockLiveness[MBB].LiveOut; + + if (Alive.any()) { + for (int pos = Alive.find_first(); pos != -1; + pos = Alive.find_next(pos)) { + if (!Starts[pos].isValid()) + Starts[pos] = Indexes->getMBBStartIdx(MBB); + if (!Finishes[pos].isValid()) + Finishes[pos] = Indexes->getMBBEndIdx(MBB); + } + } + + for (unsigned i = 0; i < NumSlots; ++i) { + assert(Starts[i].isValid() == Finishes[i].isValid() && "Unmatched range"); + if (!Starts[i].isValid()) + continue; + + assert(Starts[i] && Finishes[i] && "Invalid interval"); + VNInfo *ValNum = Intervals[i]->getValNumInfo(0); + SlotIndex S = Starts[i]; + SlotIndex F = Finishes[i]; + if (S < F) { + // We have a single consecutive region. + Intervals[i]->addRange(LiveRange(S, F, ValNum)); + } else { + // We have two non consecutive regions. This happens when + // LIFETIME_START appears after the LIFETIME_END marker. + SlotIndex NewStart = Indexes->getMBBStartIdx(MBB); + SlotIndex NewFin = Indexes->getMBBEndIdx(MBB); + Intervals[i]->addRange(LiveRange(NewStart, F, ValNum)); + Intervals[i]->addRange(LiveRange(S, NewFin, ValNum)); + } + } + } +} + +bool StackColoring::removeAllMarkers() { + unsigned Count = 0; + for (unsigned i = 0; i < Markers.size(); ++i) { + Markers[i]->eraseFromParent(); + Count++; + } + Markers.clear(); + + DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n"); + return Count; +} + +void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { + unsigned FixedInstr = 0; + unsigned FixedMemOp = 0; + unsigned FixedDbg = 0; + MachineModuleInfo *MMI = &MF->getMMI(); + + // Remap debug information that refers to stack slots. + MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); + for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), + VE = VMap.end(); VI != VE; ++VI) { + const MDNode *Var = VI->first; + if (!Var) continue; + std::pair<unsigned, DebugLoc> &VP = VI->second; + if (SlotRemap.count(VP.first)) { + DEBUG(dbgs()<<"Remapping debug info for ["<<Var->getName()<<"].\n"); + VP.first = SlotRemap[VP.first]; + FixedDbg++; + } + } + + // Keep a list of *allocas* which need to be remapped. + DenseMap<const AllocaInst*, const AllocaInst*> Allocas; + for (DenseMap<int, int>::iterator it = SlotRemap.begin(), + e = SlotRemap.end(); it != e; ++it) { + const AllocaInst *From = MFI->getObjectAllocation(it->first); + const AllocaInst *To = MFI->getObjectAllocation(it->second); + assert(To && From && "Invalid allocation object"); + Allocas[From] = To; + } + + // Remap all instructions to the new stack slots. + MachineFunction::iterator BB, BBE; + MachineBasicBlock::iterator I, IE; + for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB) + for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { + + // Skip lifetime markers. We'll remove them soon. + if (I->getOpcode() == TargetOpcode::LIFETIME_START || + I->getOpcode() == TargetOpcode::LIFETIME_END) + continue; + + // Update the MachineMemOperand to use the new alloca. + for (MachineInstr::mmo_iterator MM = I->memoperands_begin(), + E = I->memoperands_end(); MM != E; ++MM) { + MachineMemOperand *MMO = *MM; + + const Value *V = MMO->getValue(); + + if (!V) + continue; + + // Climb up and find the original alloca. + V = GetUnderlyingObject(V); + // If we did not find one, or if the one that we found is not in our + // map, then move on. + if (!V || !isa<AllocaInst>(V)) { + // Clear mem operand since we don't know for sure that it doesn't + // alias a merged alloca. + MMO->setValue(0); + continue; + } + const AllocaInst *AI= cast<AllocaInst>(V); + if (!Allocas.count(AI)) + continue; + + MMO->setValue(Allocas[AI]); + FixedMemOp++; + } + + // Update all of the machine instruction operands. + for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { + MachineOperand &MO = I->getOperand(i); + + if (!MO.isFI()) + continue; + int FromSlot = MO.getIndex(); + + // Don't touch arguments. + if (FromSlot<0) + continue; + + // Only look at mapped slots. + if (!SlotRemap.count(FromSlot)) + continue; + + // In a debug build, check that the instruction that we are modifying is + // inside the expected live range. If the instruction is not inside + // the calculated range then it means that the alloca usage moved + // outside of the lifetime markers, or that the user has a bug. + // NOTE: Alloca address calculations which happen outside the lifetime + // zone are are okay, despite the fact that we don't have a good way + // for validating all of the usages of the calculation. +#ifndef NDEBUG + bool TouchesMemory = I->mayLoad() || I->mayStore(); + // If we *don't* protect the user from escaped allocas, don't bother + // validating the instructions. + if (!I->isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) { + SlotIndex Index = Indexes->getInstructionIndex(I); + LiveInterval *Interval = Intervals[FromSlot]; + assert(Interval->find(Index) != Interval->end() && + "Found instruction usage outside of live range."); + } +#endif + + // Fix the machine instructions. + int ToSlot = SlotRemap[FromSlot]; + MO.setIndex(ToSlot); + FixedInstr++; + } + } + + DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n"); + DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n"); + DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n"); +} + +void StackColoring::removeInvalidSlotRanges() { + MachineFunction::iterator BB, BBE; + MachineBasicBlock::iterator I, IE; + for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB) + for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { + + if (I->getOpcode() == TargetOpcode::LIFETIME_START || + I->getOpcode() == TargetOpcode::LIFETIME_END || I->isDebugValue()) + continue; + + // Some intervals are suspicious! In some cases we find address + // calculations outside of the lifetime zone, but not actual memory + // read or write. Memory accesses outside of the lifetime zone are a clear + // violation, but address calculations are okay. This can happen when + // GEPs are hoisted outside of the lifetime zone. + // So, in here we only check instructions which can read or write memory. + if (!I->mayLoad() && !I->mayStore()) + continue; + + // Check all of the machine operands. + for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { + MachineOperand &MO = I->getOperand(i); + + if (!MO.isFI()) + continue; + + int Slot = MO.getIndex(); + + if (Slot<0) + continue; + + if (Intervals[Slot]->empty()) + continue; + + // Check that the used slot is inside the calculated lifetime range. + // If it is not, warn about it and invalidate the range. + LiveInterval *Interval = Intervals[Slot]; + SlotIndex Index = Indexes->getInstructionIndex(I); + if (Interval->find(Index) == Interval->end()) { + Intervals[Slot]->clear(); + DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n"); + EscapedAllocas++; + } + } + } +} + +void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap, + unsigned NumSlots) { + // Expunge slot remap map. + for (unsigned i=0; i < NumSlots; ++i) { + // If we are remapping i + if (SlotRemap.count(i)) { + int Target = SlotRemap[i]; + // As long as our target is mapped to something else, follow it. + while (SlotRemap.count(Target)) { + Target = SlotRemap[Target]; + SlotRemap[i] = Target; + } + } + } +} + +bool StackColoring::runOnMachineFunction(MachineFunction &Func) { + DEBUG(dbgs() << "********** Stack Coloring **********\n" + << "********** Function: " + << ((const Value*)Func.getFunction())->getName() << '\n'); + MF = &Func; + MFI = MF->getFrameInfo(); + Indexes = &getAnalysis<SlotIndexes>(); + BlockLiveness.clear(); + BasicBlocks.clear(); + BasicBlockNumbering.clear(); + Markers.clear(); + Intervals.clear(); + VNInfoAllocator.Reset(); + + unsigned NumSlots = MFI->getObjectIndexEnd(); + + // If there are no stack slots then there are no markers to remove. + if (!NumSlots) + return false; + + SmallVector<int, 8> SortedSlots; + + SortedSlots.reserve(NumSlots); + Intervals.reserve(NumSlots); + + unsigned NumMarkers = collectMarkers(NumSlots); + + unsigned TotalSize = 0; + DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n"); + DEBUG(dbgs()<<"Slot structure:\n"); + + for (int i=0; i < MFI->getObjectIndexEnd(); ++i) { + DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n"); + TotalSize += MFI->getObjectSize(i); + } + + DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n"); + + // Don't continue because there are not enough lifetime markers, or the + // stack is too small, or we are told not to optimize the slots. + if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) { + DEBUG(dbgs()<<"Will not try to merge slots.\n"); + return removeAllMarkers(); + } + + for (unsigned i=0; i < NumSlots; ++i) { + LiveInterval *LI = new LiveInterval(i, 0); + Intervals.push_back(LI); + LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator); + SortedSlots.push_back(i); + } + + // Calculate the liveness of each block. + calculateLocalLiveness(); + + // Propagate the liveness information. + calculateLiveIntervals(NumSlots); + + // Search for allocas which are used outside of the declared lifetime + // markers. + if (ProtectFromEscapedAllocas) + removeInvalidSlotRanges(); + + // Maps old slots to new slots. + DenseMap<int, int> SlotRemap; + unsigned RemovedSlots = 0; + unsigned ReducedSize = 0; + + // Do not bother looking at empty intervals. + for (unsigned I = 0; I < NumSlots; ++I) { + if (Intervals[SortedSlots[I]]->empty()) + SortedSlots[I] = -1; + } + + // This is a simple greedy algorithm for merging allocas. First, sort the + // slots, placing the largest slots first. Next, perform an n^2 scan and look + // for disjoint slots. When you find disjoint slots, merge the samller one + // into the bigger one and update the live interval. Remove the small alloca + // and continue. + + // Sort the slots according to their size. Place unused slots at the end. + std::sort(SortedSlots.begin(), SortedSlots.end(), SlotSizeSorter(MFI)); + + bool Chanded = true; + while (Chanded) { + Chanded = false; + for (unsigned I = 0; I < NumSlots; ++I) { + if (SortedSlots[I] == -1) + continue; + + for (unsigned J=I+1; J < NumSlots; ++J) { + if (SortedSlots[J] == -1) + continue; + + int FirstSlot = SortedSlots[I]; + int SecondSlot = SortedSlots[J]; + LiveInterval *First = Intervals[FirstSlot]; + LiveInterval *Second = Intervals[SecondSlot]; + assert (!First->empty() && !Second->empty() && "Found an empty range"); + + // Merge disjoint slots. + if (!First->overlaps(*Second)) { + Chanded = true; + First->MergeRangesInAsValue(*Second, First->getValNumInfo(0)); + SlotRemap[SecondSlot] = FirstSlot; + SortedSlots[J] = -1; + DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<< + SecondSlot<<" together.\n"); + unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot), + MFI->getObjectAlignment(SecondSlot)); + + assert(MFI->getObjectSize(FirstSlot) >= + MFI->getObjectSize(SecondSlot) && + "Merging a small object into a larger one"); + + RemovedSlots+=1; + ReducedSize += MFI->getObjectSize(SecondSlot); + MFI->setObjectAlignment(FirstSlot, MaxAlignment); + MFI->RemoveStackObject(SecondSlot); + } + } + } + }// While changed. + + // Record statistics. + StackSpaceSaved += ReducedSize; + StackSlotMerged += RemovedSlots; + DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<< + ReducedSize<<" bytes\n"); + + // Scan the entire function and update all machine operands that use frame + // indices to use the remapped frame index. + expungeSlotMap(SlotRemap, NumSlots); + remapInstructions(SlotRemap); + + // Release the intervals. + for (unsigned I = 0; I < NumSlots; ++I) { + delete Intervals[I]; + } + + return removeAllMarkers(); +} diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp index f1eab1f..31e9ec0 100644 --- a/contrib/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -26,18 +26,12 @@ #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/ADT/Triple.h" using namespace llvm; -// SSPBufferSize - The lower bound for a buffer to be considered for stack -// smashing protection. -static cl::opt<unsigned> -SSPBufferSize("stack-protector-buffer-size", cl::init(8), - cl::desc("Lower bound for a buffer to be considered for " - "stack protection")); - namespace { class StackProtector : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining @@ -61,6 +55,11 @@ namespace { /// check fails. BasicBlock *CreateFailBB(); + /// ContainsProtectableArray - Check whether the type either is an array or + /// contains an array of sufficient size so that we need stack protectors + /// for it. + bool ContainsProtectableArray(Type *Ty, bool InStruct = false) const; + /// RequiresStackProtector - Check whether or not this function needs a /// stack protector based upon the stack protector level. bool RequiresStackProtector() const; @@ -100,21 +99,50 @@ bool StackProtector::runOnFunction(Function &Fn) { return InsertStackProtectors(); } +/// ContainsProtectableArray - Check whether the type either is an array or +/// contains a char array of sufficient size so that we need stack protectors +/// for it. +bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const { + if (!Ty) return false; + if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { + const TargetMachine &TM = TLI->getTargetMachine(); + if (!AT->getElementType()->isIntegerTy(8)) { + Triple Trip(TM.getTargetTriple()); + + // If we're on a non-Darwin platform or we're inside of a structure, don't + // add stack protectors unless the array is a character array. + if (InStruct || !Trip.isOSDarwin()) + return false; + } + + // If an array has more than SSPBufferSize bytes of allocated space, then we + // emit stack protectors. + if (TM.Options.SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT)) + return true; + } + + const StructType *ST = dyn_cast<StructType>(Ty); + if (!ST) return false; + + for (StructType::element_iterator I = ST->element_begin(), + E = ST->element_end(); I != E; ++I) + if (ContainsProtectableArray(*I, true)) + return true; + + return false; +} + /// RequiresStackProtector - Check whether or not this function needs a stack /// protector based upon the stack protector level. The heuristic we use is to /// add a guard variable to functions that call alloca, and functions with /// buffers larger than SSPBufferSize bytes. bool StackProtector::RequiresStackProtector() const { - if (F->hasFnAttr(Attribute::StackProtectReq)) + if (F->getFnAttributes().hasAttribute(Attributes::StackProtectReq)) return true; - if (!F->hasFnAttr(Attribute::StackProtect)) + if (!F->getFnAttributes().hasAttribute(Attributes::StackProtect)) return false; - const TargetData *TD = TLI->getTargetData(); - const TargetMachine &TM = TLI->getTargetMachine(); - Triple Trip(TM.getTargetTriple()); - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { BasicBlock *BB = I; @@ -126,17 +154,8 @@ bool StackProtector::RequiresStackProtector() const { // protectors. return true; - if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) { - // If we're on a non-Darwin platform, don't add stack protectors - // unless the array is a character array. - if (!Trip.isOSDarwin() && !AT->getElementType()->isIntegerTy(8)) - continue; - - // If an array has more than SSPBufferSize bytes of allocated space, - // then we emit stack protectors. - if (SSPBufferSize <= TD->getTypeAllocSize(AT)) - return true; - } + if (ContainsProtectableArray(AI->getAllocatedType())) + return true; } } diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp index 20da36e..d349abc 100644 --- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -11,8 +11,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "stackcoloring" -#include "llvm/Function.h" +#define DEBUG_TYPE "stackslotcoloring" #include "llvm/Module.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -391,8 +390,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { DEBUG({ dbgs() << "********** Stack Slot Coloring **********\n" - << "********** Function: " - << MF.getFunction()->getName() << '\n'; + << "********** Function: " << MF.getName() << '\n'; }); MFI = MF.getFrameInfo(); diff --git a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp index 5b06195..39fd600 100644 --- a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp @@ -404,9 +404,9 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { } void StrongPHIElimination::addReg(unsigned Reg) { - if (RegNodeMap.count(Reg)) - return; - RegNodeMap[Reg] = new (Allocator) Node(Reg); + Node *&N = RegNodeMap[Reg]; + if (!N) + N = new (Allocator) Node(Reg); } StrongPHIElimination::Node* @@ -714,8 +714,9 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, assert(getRegColor(CopyReg) == CopyReg); } - if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor))) - InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr; + // Insert into map if not already there. + InsertedSrcCopyMap.insert(std::make_pair(std::make_pair(PredBB, PHIColor), + CopyInstr)); } SrcMO.setReg(CopyReg); diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp index a813fa6..1497d1b 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -552,7 +552,8 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, // compensate for the duplication. unsigned MaxDuplicateCount; if (TailDuplicateSize.getNumOccurrences() == 0 && - MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + MF.getFunction()->getFnAttributes(). + hasAttribute(Attributes::OptimizeForSize)) MaxDuplicateCount = 1; else MaxDuplicateCount = TailDuplicateSize; diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp index ddee6b2..4439192 100644 --- a/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -99,17 +99,8 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, if (NewMI) { // Create a new instruction. - bool Reg0IsDead = HasDef ? MI->getOperand(0).isDead() : false; MachineFunction &MF = *MI->getParent()->getParent(); - if (HasDef) - return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) - .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead), SubReg0) - .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2) - .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1); - else - return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) - .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2) - .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1); + MI = MF.CloneMachineInstr(MI); } if (HasDef) { @@ -572,6 +563,8 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData, /// Return the default expected latency for a def based on it's opcode. unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel, const MachineInstr *DefMI) const { + if (DefMI->isTransient()) + return 0; if (DefMI->mayLoad()) return SchedModel->LoadLatency; if (isHighLatencyDef(DefMI->getOpcode())) @@ -615,13 +608,13 @@ getOperandLatency(const InstrItineraryData *ItinData, /// If we can determine the operand latency from the def only, without itinerary /// lookup, do so. Otherwise return -1. -static int computeDefOperandLatency( - const TargetInstrInfo *TII, const InstrItineraryData *ItinData, - const MachineInstr *DefMI, bool FindMin) { +int TargetInstrInfo::computeDefOperandLatency( + const InstrItineraryData *ItinData, + const MachineInstr *DefMI, bool FindMin) const { // Let the target hook getInstrLatency handle missing itineraries. if (!ItinData) - return TII->getInstrLatency(ItinData, DefMI); + return getInstrLatency(ItinData, DefMI); // Return a latency based on the itinerary properties and defining instruction // if possible. Some common subtargets don't require per-operand latency, @@ -630,7 +623,7 @@ static int computeDefOperandLatency( // If MinLatency is valid, call getInstrLatency. This uses Stage latency if // it exists before defaulting to MinLatency. if (ItinData->SchedModel->MinLatency >= 0) - return TII->getInstrLatency(ItinData, DefMI); + return getInstrLatency(ItinData, DefMI); // If MinLatency is invalid, OperandLatency is interpreted as MinLatency. // For empty itineraries, short-cirtuit the check and default to one cycle. @@ -638,29 +631,42 @@ static int computeDefOperandLatency( return 1; } else if(ItinData->isEmpty()) - return TII->defaultDefLatency(ItinData->SchedModel, DefMI); + return defaultDefLatency(ItinData->SchedModel, DefMI); // ...operand lookup required return -1; } /// computeOperandLatency - Compute and return the latency of the given data -/// dependent def and use when the operand indices are already known. +/// dependent def and use when the operand indices are already known. UseMI may +/// be NULL for an unknown use. +/// +/// FindMin may be set to get the minimum vs. expected latency. Minimum +/// latency is used for scheduling groups, while expected latency is for +/// instruction cost and critical path. /// -/// FindMin may be set to get the minimum vs. expected latency. +/// Depending on the subtarget's itinerary properties, this may or may not need +/// to call getOperandLatency(). For most subtargets, we don't need DefIdx or +/// UseIdx to compute min latency. unsigned TargetInstrInfo:: computeOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx, bool FindMin) const { - int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin); + int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin); if (DefLatency >= 0) return DefLatency; assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); - int OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + int OperLatency = 0; + if (UseMI) + OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + else { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); + } if (OperLatency >= 0) return OperLatency; @@ -673,77 +679,3 @@ computeOperandLatency(const InstrItineraryData *ItinData, defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; } - -/// computeOperandLatency - Compute and return the latency of the given data -/// dependent def and use. DefMI must be a valid def. UseMI may be NULL for an -/// unknown use. Depending on the subtarget's itinerary properties, this may or -/// may not need to call getOperandLatency(). -/// -/// FindMin may be set to get the minimum vs. expected latency. Minimum -/// latency is used for scheduling groups, while expected latency is for -/// instruction cost and critical path. -/// -/// For most subtargets, we don't need DefIdx or UseIdx to compute min latency. -/// DefMI must be a valid definition, but UseMI may be NULL for an unknown use. -unsigned TargetInstrInfo:: -computeOperandLatency(const InstrItineraryData *ItinData, - const TargetRegisterInfo *TRI, - const MachineInstr *DefMI, const MachineInstr *UseMI, - unsigned Reg, bool FindMin) const { - - int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin); - if (DefLatency >= 0) - return DefLatency; - - assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); - - // Find the definition of the register in the defining instruction. - int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); - if (DefIdx != -1) { - const MachineOperand &MO = DefMI->getOperand(DefIdx); - if (MO.isReg() && MO.isImplicit() && - DefIdx >= (int)DefMI->getDesc().getNumOperands()) { - // This is an implicit def, getOperandLatency() won't return the correct - // latency. e.g. - // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> - // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... - // What we want is to compute latency between def of %D6/%D7 and use of - // %Q3 instead. - unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); - if (DefMI->getOperand(Op2).isReg()) - DefIdx = Op2; - } - // For all uses of the register, calculate the maxmimum latency - int OperLatency = -1; - - // UseMI is null, then it must be a scheduling barrier. - if (!UseMI) { - unsigned DefClass = DefMI->getDesc().getSchedClass(); - OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); - } - else { - for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = UseMI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - unsigned MOReg = MO.getReg(); - if (MOReg != Reg) - continue; - - int UseCycle = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, i); - OperLatency = std::max(OperLatency, UseCycle); - } - } - // If we found an operand latency, we're done. - if (OperLatency >= 0) - return OperLatency; - } - // No operand latency was found. - unsigned InstrLatency = getInstrLatency(ItinData, DefMI); - - // Expected latency is the max of the stage latency and itinerary props. - if (!FindMin) - InstrLatency = std::max(InstrLatency, - defaultDefLatency(ItinData->SchedModel, DefMI)); - return InstrLatency; -} diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 2a2fa9e..8f5d770 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -27,7 +27,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Dwarf.h" @@ -77,9 +77,9 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, Flags, SectionKind::getDataRel(), 0, Label->getName()); - unsigned Size = TM.getTargetData()->getPointerSize(); + unsigned Size = TM.getDataLayout()->getPointerSize(); Streamer.SwitchSection(Sec); - Streamer.EmitValueToAlignment(TM.getTargetData()->getPointerABIAlignment()); + Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment()); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::Create(Size, getContext()); Streamer.EmitELFSize(Label, E); @@ -247,7 +247,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // FIXME: this is getting the alignment of the character, not the // alignment of the global! unsigned Align = - TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)); + TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)); const char *SizeSpec = ".rodata.str1."; if (Kind.isMergeable2ByteCString()) @@ -522,14 +522,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // FIXME: Alignment check should be handled by section classifier. if (Kind.isMergeable1ByteCString() && - TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) + TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) return CStringSection; // Do not put 16-bit arrays in the UString section if they have an // externally visible label, this runs into issues with certain linker // versions. if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() && - TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) + TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) return UStringSection; if (Kind.isMergeableConst()) { diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp new file mode 100644 index 0000000..ca3b0e0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp @@ -0,0 +1,306 @@ +//===-- llvm/Target/TargetSchedule.cpp - Sched Machine Model ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a wrapper around MCSchedModel that allows the interface +// to benefit from information currently only available in TargetInstrInfo. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true), + cl::desc("Use TargetSchedModel for latency lookup")); + +static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true), + cl::desc("Use InstrItineraryData for latency lookup")); + +bool TargetSchedModel::hasInstrSchedModel() const { + return EnableSchedModel && SchedModel.hasInstrSchedModel(); +} + +bool TargetSchedModel::hasInstrItineraries() const { + return EnableSchedItins && !InstrItins.isEmpty(); +} + +static unsigned gcd(unsigned Dividend, unsigned Divisor) { + // Dividend and Divisor will be naturally swapped as needed. + while(Divisor) { + unsigned Rem = Dividend % Divisor; + Dividend = Divisor; + Divisor = Rem; + }; + return Dividend; +} +static unsigned lcm(unsigned A, unsigned B) { + unsigned LCM = (uint64_t(A) * B) / gcd(A, B); + assert((LCM >= A && LCM >= B) && "LCM overflow"); + return LCM; +} + +void TargetSchedModel::init(const MCSchedModel &sm, + const TargetSubtargetInfo *sti, + const TargetInstrInfo *tii) { + SchedModel = sm; + STI = sti; + TII = tii; + STI->initInstrItins(InstrItins); + + unsigned NumRes = SchedModel.getNumProcResourceKinds(); + ResourceFactors.resize(NumRes); + ResourceLCM = SchedModel.IssueWidth; + for (unsigned Idx = 0; Idx < NumRes; ++Idx) { + unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits; + if (NumUnits > 0) + ResourceLCM = lcm(ResourceLCM, NumUnits); + } + MicroOpFactor = ResourceLCM / SchedModel.IssueWidth; + for (unsigned Idx = 0; Idx < NumRes; ++Idx) { + unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits; + ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0; + } +} + +unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI, + const MCSchedClassDesc *SC) const { + if (hasInstrItineraries()) { + int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass()); + return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI); + } + if (hasInstrSchedModel()) { + if (!SC) + SC = resolveSchedClass(MI); + if (SC->isValid()) + return SC->NumMicroOps; + } + return MI->isTransient() ? 0 : 1; +} + +// The machine model may explicitly specify an invalid latency, which +// effectively means infinite latency. Since users of the TargetSchedule API +// don't know how to handle this, we convert it to a very large latency that is +// easy to distinguish when debugging the DAG but won't induce overflow. +static unsigned convertLatency(int Cycles) { + return Cycles >= 0 ? Cycles : 1000; +} + +/// If we can determine the operand latency from the def only, without machine +/// model or itinerary lookup, do so. Otherwise return -1. +int TargetSchedModel::getDefLatency(const MachineInstr *DefMI, + bool FindMin) const { + + // Return a latency based on the itinerary properties and defining instruction + // if possible. Some common subtargets don't require per-operand latency, + // especially for minimum latencies. + if (FindMin) { + // If MinLatency is invalid, then use the itinerary for MinLatency. If no + // itinerary exists either, then use single cycle latency. + if (SchedModel.MinLatency < 0 && !hasInstrItineraries()) { + return 1; + } + return SchedModel.MinLatency; + } + else if (!hasInstrSchedModel() && !hasInstrItineraries()) { + return TII->defaultDefLatency(&SchedModel, DefMI); + } + // ...operand lookup required + return -1; +} + +/// Return the MCSchedClassDesc for this instruction. Some SchedClasses require +/// evaluation of predicates that depend on instruction operands or flags. +const MCSchedClassDesc *TargetSchedModel:: +resolveSchedClass(const MachineInstr *MI) const { + + // Get the definition's scheduling class descriptor from this machine model. + unsigned SchedClass = MI->getDesc().getSchedClass(); + const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass); + +#ifndef NDEBUG + unsigned NIter = 0; +#endif + while (SCDesc->isVariant()) { + assert(++NIter < 6 && "Variants are nested deeper than the magic number"); + + SchedClass = STI->resolveSchedClass(SchedClass, MI, this); + SCDesc = SchedModel.getSchedClassDesc(SchedClass); + } + return SCDesc; +} + +/// Find the def index of this operand. This index maps to the machine model and +/// is independent of use operands. Def operands may be reordered with uses or +/// merged with uses without affecting the def index (e.g. before/after +/// regalloc). However, an instruction's def operands must never be reordered +/// with respect to each other. +static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) { + unsigned DefIdx = 0; + for (unsigned i = 0; i != DefOperIdx; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) + ++DefIdx; + } + return DefIdx; +} + +/// Find the use index of this operand. This is independent of the instruction's +/// def operands. +/// +/// Note that uses are not determined by the operand's isUse property, which +/// is simply the inverse of isDef. Here we consider any readsReg operand to be +/// a "use". The machine model allows an operand to be both a Def and Use. +static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) { + unsigned UseIdx = 0; + for (unsigned i = 0; i != UseOperIdx; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.readsReg()) + ++UseIdx; + } + return UseIdx; +} + +// Top-level API for clients that know the operand indices. +unsigned TargetSchedModel::computeOperandLatency( + const MachineInstr *DefMI, unsigned DefOperIdx, + const MachineInstr *UseMI, unsigned UseOperIdx, + bool FindMin) const { + + int DefLatency = getDefLatency(DefMI, FindMin); + if (DefLatency >= 0) + return DefLatency; + + if (hasInstrItineraries()) { + int OperLatency = 0; + if (UseMI) { + OperLatency = + TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, UseMI, UseOperIdx); + } + else { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx); + } + if (OperLatency >= 0) + return OperLatency; + + // No operand latency was found. + unsigned InstrLatency = TII->getInstrLatency(&InstrItins, DefMI); + + // Expected latency is the max of the stage latency and itinerary props. + // Rather than directly querying InstrItins stage latency, we call a TII + // hook to allow subtargets to specialize latency. This hook is only + // applicable to the InstrItins model. InstrSchedModel should model all + // special cases without TII hooks. + if (!FindMin) + InstrLatency = std::max(InstrLatency, + TII->defaultDefLatency(&SchedModel, DefMI)); + return InstrLatency; + } + assert(!FindMin && hasInstrSchedModel() && + "Expected a SchedModel for this cpu"); + const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); + unsigned DefIdx = findDefIdx(DefMI, DefOperIdx); + if (DefIdx < SCDesc->NumWriteLatencyEntries) { + // Lookup the definition's write latency in SubtargetInfo. + const MCWriteLatencyEntry *WLEntry = + STI->getWriteLatencyEntry(SCDesc, DefIdx); + unsigned WriteID = WLEntry->WriteResourceID; + unsigned Latency = convertLatency(WLEntry->Cycles); + if (!UseMI) + return Latency; + + // Lookup the use's latency adjustment in SubtargetInfo. + const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI); + if (UseDesc->NumReadAdvanceEntries == 0) + return Latency; + unsigned UseIdx = findUseIdx(UseMI, UseOperIdx); + return Latency - STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); + } + // If DefIdx does not exist in the model (e.g. implicit defs), then return + // unit latency (defaultDefLatency may be too conservative). +#ifndef NDEBUG + if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() + && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()) { + std::string Err; + raw_string_ostream ss(Err); + ss << "DefIdx " << DefIdx << " exceeds machine model writes for " + << *DefMI; + report_fatal_error(ss.str()); + } +#endif + return DefMI->isTransient() ? 0 : 1; +} + +unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { + // For the itinerary model, fall back to the old subtarget hook. + // Allow subtargets to compute Bundle latencies outside the machine model. + if (hasInstrItineraries() || MI->isBundle()) + return TII->getInstrLatency(&InstrItins, MI); + + if (hasInstrSchedModel()) { + const MCSchedClassDesc *SCDesc = resolveSchedClass(MI); + if (SCDesc->isValid()) { + unsigned Latency = 0; + for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; + DefIdx != DefEnd; ++DefIdx) { + // Lookup the definition's write latency in SubtargetInfo. + const MCWriteLatencyEntry *WLEntry = + STI->getWriteLatencyEntry(SCDesc, DefIdx); + Latency = std::max(Latency, convertLatency(WLEntry->Cycles)); + } + return Latency; + } + } + return TII->defaultDefLatency(&SchedModel, MI); +} + +unsigned TargetSchedModel:: +computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, + const MachineInstr *DepMI) const { + // MinLatency == -1 is for in-order processors that always have unit + // MinLatency. MinLatency > 0 is for in-order processors with varying min + // latencies, but since this is not a RAW dep, we always use unit latency. + if (SchedModel.MinLatency != 0) + return 1; + + // MinLatency == 0 indicates an out-of-order processor that can dispatch + // WAW dependencies in the same cycle. + + // Treat predication as a data dependency for out-of-order cpus. In-order + // cpus do not need to treat predicated writes specially. + // + // TODO: The following hack exists because predication passes do not + // correctly append imp-use operands, and readsReg() strangely returns false + // for predicated defs. + unsigned Reg = DefMI->getOperand(DefOperIdx).getReg(); + const MachineFunction &MF = *DefMI->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI)) + return computeInstrLatency(DefMI); + + // If we have a per operand scheduling model, check if this def is writing + // an unbuffered resource. If so, it treated like an in-order cpu. + if (hasInstrSchedModel()) { + const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); + if (SCDesc->isValid()) { + for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc), + *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) { + if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered) + return 1; + } + } + } + return 0; +} diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index aa601af..a9058bc 100644 --- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -60,116 +60,108 @@ STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); namespace { - class TwoAddressInstructionPass : public MachineFunctionPass { - MachineFunction *MF; - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - const InstrItineraryData *InstrItins; - MachineRegisterInfo *MRI; - LiveVariables *LV; - SlotIndexes *Indexes; - LiveIntervals *LIS; - AliasAnalysis *AA; - CodeGenOpt::Level OptLevel; - - // DistanceMap - Keep track the distance of a MI from the start of the - // current basic block. - DenseMap<MachineInstr*, unsigned> DistanceMap; - - // SrcRegMap - A map from virtual registers to physical registers which - // are likely targets to be coalesced to due to copies from physical - // registers to virtual registers. e.g. v1024 = move r0. - DenseMap<unsigned, unsigned> SrcRegMap; - - // DstRegMap - A map from virtual registers to physical registers which - // are likely targets to be coalesced to due to copies to physical - // registers from virtual registers. e.g. r1 = move v1024. - DenseMap<unsigned, unsigned> DstRegMap; - - /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen - /// during the initial walk of the machine function. - SmallVector<MachineInstr*, 16> RegSequences; - - bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI, - unsigned Reg, - MachineBasicBlock::iterator OldPos); - - bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist, - unsigned &LastDef); - - bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, - MachineInstr *MI, MachineBasicBlock *MBB, - unsigned Dist); +class TwoAddressInstructionPass : public MachineFunctionPass { + MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const InstrItineraryData *InstrItins; + MachineRegisterInfo *MRI; + LiveVariables *LV; + SlotIndexes *Indexes; + LiveIntervals *LIS; + AliasAnalysis *AA; + CodeGenOpt::Level OptLevel; + + // The current basic block being processed. + MachineBasicBlock *MBB; + + // DistanceMap - Keep track the distance of a MI from the start of the + // current basic block. + DenseMap<MachineInstr*, unsigned> DistanceMap; + + // Set of already processed instructions in the current block. + SmallPtrSet<MachineInstr*, 8> Processed; - bool CommuteInstruction(MachineBasicBlock::iterator &mi, - MachineFunction::iterator &mbbi, - unsigned RegB, unsigned RegC, unsigned Dist); + // SrcRegMap - A map from virtual registers to physical registers which are + // likely targets to be coalesced to due to copies from physical registers to + // virtual registers. e.g. v1024 = move r0. + DenseMap<unsigned, unsigned> SrcRegMap; - bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB); + // DstRegMap - A map from virtual registers to physical registers which are + // likely targets to be coalesced to due to copies to physical registers from + // virtual registers. e.g. r1 = move v1024. + DenseMap<unsigned, unsigned> DstRegMap; - bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, - unsigned RegA, unsigned RegB, unsigned Dist); + /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen + /// during the initial walk of the machine function. + SmallVector<MachineInstr*, 16> RegSequences; - bool isDefTooClose(unsigned Reg, unsigned Dist, - MachineInstr *MI, MachineBasicBlock *MBB); + bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg, + MachineBasicBlock::iterator OldPos); - bool RescheduleMIBelowKill(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned Reg); - bool RescheduleKillAboveMI(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned Reg); + bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef); - bool TryInstructionTransform(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, - unsigned SrcIdx, unsigned DstIdx, - unsigned Dist, - SmallPtrSet<MachineInstr*, 8> &Processed); + bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, + MachineInstr *MI, unsigned Dist); - void ScanUses(unsigned DstReg, MachineBasicBlock *MBB, - SmallPtrSet<MachineInstr*, 8> &Processed); + bool commuteInstruction(MachineBasicBlock::iterator &mi, + unsigned RegB, unsigned RegC, unsigned Dist); - void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, - SmallPtrSet<MachineInstr*, 8> &Processed); + bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB); - typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList; - typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap; - bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); - void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); + bool convertInstTo3Addr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned RegA, unsigned RegB, unsigned Dist); - void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg); + bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI); - /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part - /// of the de-ssa process. This replaces sources of REG_SEQUENCE as - /// sub-register references of the register defined by REG_SEQUENCE. - bool EliminateRegSequences(); + bool rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); + bool rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); - public: - static char ID; // Pass identification, replacement for typeid - TwoAddressInstructionPass() : MachineFunctionPass(ID) { - initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); - } + bool tryInstructionTransform(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned SrcIdx, unsigned DstIdx, + unsigned Dist); - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<AliasAnalysis>(); - AU.addPreserved<LiveVariables>(); - AU.addPreserved<SlotIndexes>(); - AU.addPreserved<LiveIntervals>(); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); - } + void scanUses(unsigned DstReg); - /// runOnMachineFunction - Pass entry point. - bool runOnMachineFunction(MachineFunction&); - }; -} + void processCopy(MachineInstr *MI); + + typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList; + typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap; + bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); + void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); + + /// eliminateRegSequences - Eliminate REG_SEQUENCE instructions as part of + /// the de-ssa process. This replaces sources of REG_SEQUENCE as sub-register + /// references of the register defined by REG_SEQUENCE. + bool eliminateRegSequences(); + +public: + static char ID; // Pass identification, replacement for typeid + TwoAddressInstructionPass() : MachineFunctionPass(ID) { + initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<LiveVariables>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// runOnMachineFunction - Pass entry point. + bool runOnMachineFunction(MachineFunction&); +}; +} // end anonymous namespace char TwoAddressInstructionPass::ID = 0; INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction", @@ -180,13 +172,13 @@ INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction", char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; -/// Sink3AddrInstruction - A two-address instruction has been converted to a +/// sink3AddrInstruction - A two-address instruction has been converted to a /// three-address instruction to avoid clobbering a register. Try to sink it /// past the instruction that would kill the above mentioned register to reduce /// register pressure. -bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, - MachineInstr *MI, unsigned SavedReg, - MachineBasicBlock::iterator OldPos) { +bool TwoAddressInstructionPass:: +sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, + MachineBasicBlock::iterator OldPos) { // FIXME: Shouldn't we be trying to do this before we three-addressify the // instruction? After this transformation is done, we no longer need // the instruction to be in three-address form. @@ -299,13 +291,12 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, return true; } -/// NoUseAfterLastDef - Return true if there are no intervening uses between the +/// noUseAfterLastDef - Return true if there are no intervening uses between the /// last instruction in the MBB that defines the specified register and the /// two-address instruction which is being processed. It also returns the last /// def location by reference -bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg, - MachineBasicBlock *MBB, unsigned Dist, - unsigned &LastDef) { +bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist, + unsigned &LastDef) { LastDef = 0; unsigned LastUse = Dist; for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), @@ -465,10 +456,9 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { /// isProfitableToCommute - Return true if it's potentially profitable to commute /// the two-address instruction that's being processed. bool -TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB, - unsigned regC, - MachineInstr *MI, MachineBasicBlock *MBB, - unsigned Dist) { +TwoAddressInstructionPass:: +isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, + MachineInstr *MI, unsigned Dist) { if (OptLevel == CodeGenOpt::None) return false; @@ -516,13 +506,13 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB, // If there is a use of regC between its last def (could be livein) and this // instruction, then bail. unsigned LastDefC = 0; - if (!NoUseAfterLastDef(regC, MBB, Dist, LastDefC)) + if (!noUseAfterLastDef(regC, Dist, LastDefC)) return false; // If there is a use of regB between its last def (could be livein) and this // instruction, then go ahead and make this transformation. unsigned LastDefB = 0; - if (!NoUseAfterLastDef(regB, MBB, Dist, LastDefB)) + if (!noUseAfterLastDef(regB, Dist, LastDefB)) return true; // Since there are no intervening uses for both registers, then commute @@ -530,13 +520,12 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB, return LastDefB && LastDefC && LastDefC > LastDefB; } -/// CommuteInstruction - Commute a two-address instruction and update the basic +/// commuteInstruction - Commute a two-address instruction and update the basic /// block, distance map, and live variables if needed. Return true if it is /// successful. -bool -TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, - MachineFunction::iterator &mbbi, - unsigned RegB, unsigned RegC, unsigned Dist) { +bool TwoAddressInstructionPass:: +commuteInstruction(MachineBasicBlock::iterator &mi, + unsigned RegB, unsigned RegC, unsigned Dist) { MachineInstr *MI = mi; DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); MachineInstr *NewMI = TII->commuteInstruction(MI); @@ -555,8 +544,8 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, if (Indexes) Indexes->replaceMachineInstrInMaps(MI, NewMI); - mbbi->insert(mi, NewMI); // Insert the new inst - mbbi->erase(mi); // Nuke the old inst. + MBB->insert(mi, NewMI); // Insert the new inst + MBB->erase(mi); // Nuke the old inst. mi = NewMI; DistanceMap.insert(std::make_pair(NewMI, Dist)); } @@ -588,51 +577,51 @@ TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){ return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI)); } -/// ConvertInstTo3Addr - Convert the specified two-address instruction into a +/// convertInstTo3Addr - Convert the specified two-address instruction into a /// three address one. Return true if this transformation was successful. bool -TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, +TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, unsigned RegA, unsigned RegB, unsigned Dist) { - MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV); - if (NewMI) { - DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); - DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); - bool Sunk = false; + // FIXME: Why does convertToThreeAddress() need an iterator reference? + MachineFunction::iterator MFI = MBB; + MachineInstr *NewMI = TII->convertToThreeAddress(MFI, mi, LV); + assert(MBB == MFI && "convertToThreeAddress changed iterator reference"); + if (!NewMI) + return false; - if (Indexes) - Indexes->replaceMachineInstrInMaps(mi, NewMI); + DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); + DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); + bool Sunk = false; - if (NewMI->findRegisterUseOperand(RegB, false, TRI)) - // FIXME: Temporary workaround. If the new instruction doesn't - // uses RegB, convertToThreeAddress must have created more - // then one instruction. - Sunk = Sink3AddrInstruction(mbbi, NewMI, RegB, mi); + if (Indexes) + Indexes->replaceMachineInstrInMaps(mi, NewMI); - mbbi->erase(mi); // Nuke the old inst. + if (NewMI->findRegisterUseOperand(RegB, false, TRI)) + // FIXME: Temporary workaround. If the new instruction doesn't + // uses RegB, convertToThreeAddress must have created more + // then one instruction. + Sunk = sink3AddrInstruction(NewMI, RegB, mi); - if (!Sunk) { - DistanceMap.insert(std::make_pair(NewMI, Dist)); - mi = NewMI; - nmi = llvm::next(mi); - } + MBB->erase(mi); // Nuke the old inst. - // Update source and destination register maps. - SrcRegMap.erase(RegA); - DstRegMap.erase(RegB); - return true; + if (!Sunk) { + DistanceMap.insert(std::make_pair(NewMI, Dist)); + mi = NewMI; + nmi = llvm::next(mi); } - return false; + // Update source and destination register maps. + SrcRegMap.erase(RegA); + DstRegMap.erase(RegB); + return true; } -/// ScanUses - Scan forward recursively for only uses, update maps if the use +/// scanUses - Scan forward recursively for only uses, update maps if the use /// is a copy or a two-address instruction. void -TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB, - SmallPtrSet<MachineInstr*, 8> &Processed) { +TwoAddressInstructionPass::scanUses(unsigned DstReg) { SmallVector<unsigned, 4> VirtRegPairs; bool IsDstPhys; bool IsCopy = false; @@ -676,7 +665,7 @@ TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB, } } -/// ProcessCopy - If the specified instruction is not yet processed, process it +/// processCopy - If the specified instruction is not yet processed, process it /// if it's a copy. For a copy instruction, we find the physical registers the /// source and destination registers might be mapped to. These are kept in /// point-to maps used to determine future optimizations. e.g. @@ -688,9 +677,7 @@ TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB, /// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is /// potentially joined with r1 on the output side. It's worthwhile to commute /// 'add' to eliminate a copy. -void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, - MachineBasicBlock *MBB, - SmallPtrSet<MachineInstr*, 8> &Processed) { +void TwoAddressInstructionPass::processCopy(MachineInstr *MI) { if (Processed.count(MI)) return; @@ -707,21 +694,20 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, assert(SrcRegMap[DstReg] == SrcReg && "Can't map to two src physical registers!"); - ScanUses(DstReg, MBB, Processed); + scanUses(DstReg); } Processed.insert(MI); return; } -/// RescheduleMIBelowKill - If there is one more local instruction that reads +/// rescheduleMIBelowKill - If there is one more local instruction that reads /// 'Reg' and it kills 'Reg, consider moving the instruction below the kill /// instruction in order to eliminate the need for the copy. -bool -TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned Reg) { +bool TwoAddressInstructionPass:: +rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { // Bail immediately if we don't have LV available. We use it to find kills // efficiently. if (!LV) @@ -853,8 +839,7 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, /// isDefTooClose - Return true if the re-scheduling will put the given /// instruction too close to the defs of its register dependencies. bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, - MachineInstr *MI, - MachineBasicBlock *MBB) { + MachineInstr *MI) { for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) { MachineInstr *DefMI = &*DI; @@ -873,15 +858,14 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, return false; } -/// RescheduleKillAboveMI - If there is one more local instruction that reads +/// rescheduleKillAboveMI - If there is one more local instruction that reads /// 'Reg' and it kills 'Reg, consider moving the kill instruction above the /// current two-address instruction in order to eliminate the need for the /// copy. -bool -TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned Reg) { +bool TwoAddressInstructionPass:: +rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { // Bail immediately if we don't have LV available. We use it to find kills // efficiently. if (!LV) @@ -918,7 +902,7 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, if (MO.isUse()) { if (!MOReg) continue; - if (isDefTooClose(MOReg, DI->second, MI, MBB)) + if (isDefTooClose(MOReg, DI->second, MI)) return false; if (MOReg == Reg && !MO.isKill()) return false; @@ -1006,18 +990,16 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, return true; } -/// TryInstructionTransform - For the case where an instruction has a single +/// tryInstructionTransform - For the case where an instruction has a single /// pair of tied register operands, attempt some transformations that may /// either eliminate the tied operands or improve the opportunities for /// coalescing away the register copy. Returns true if no copy needs to be /// inserted to untie mi's operands (either because they were untied, or /// because mi was rescheduled, and will be visited again later). bool TwoAddressInstructionPass:: -TryInstructionTransform(MachineBasicBlock::iterator &mi, +tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, - unsigned SrcIdx, unsigned DstIdx, unsigned Dist, - SmallPtrSet<MachineInstr*, 8> &Processed) { + unsigned SrcIdx, unsigned DstIdx, unsigned Dist) { if (OptLevel == CodeGenOpt::None) return false; @@ -1030,7 +1012,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, bool regBKilled = isKilled(MI, regB, MRI, TII); if (TargetRegisterInfo::isVirtualRegister(regA)) - ScanUses(regA, &*mbbi, Processed); + scanUses(regA); // Check if it is profitable to commute the operands. unsigned SrcOp1, SrcOp2; @@ -1051,7 +1033,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // If C dies but B does not, swap the B and C operands. // This makes the live ranges of A and C joinable. TryCommute = true; - else if (isProfitableToCommute(regA, regB, regC, &MI, mbbi, Dist)) { + else if (isProfitableToCommute(regA, regB, regC, &MI, Dist)) { TryCommute = true; AggressiveCommute = true; } @@ -1059,7 +1041,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, } // If it's profitable to commute, try to do so. - if (TryCommute && CommuteInstruction(mi, mbbi, regB, regC, Dist)) { + if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) { ++NumCommuted; if (AggressiveCommute) ++NumAggrCommuted; @@ -1068,7 +1050,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // If there is one more use of regB later in the same MBB, consider // re-schedule this MI below it. - if (RescheduleMIBelowKill(mbbi, mi, nmi, regB)) { + if (rescheduleMIBelowKill(mi, nmi, regB)) { ++NumReSchedDowns; return true; } @@ -1078,7 +1060,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // three-address instruction. Check if it is profitable. if (!regBKilled || isProfitableToConv3Addr(regA, regB)) { // Try to convert it. - if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) { + if (convertInstTo3Addr(mi, nmi, regA, regB, Dist)) { ++NumConvertedTo3Addr; return true; // Done with this instruction. } @@ -1087,7 +1069,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // If there is one more use of regB later in the same MBB, consider // re-schedule it before this MI if it's legal. - if (RescheduleKillAboveMI(mbbi, mi, nmi, regB)) { + if (rescheduleKillAboveMI(mi, nmi, regB)) { ++NumReSchedUps; return true; } @@ -1131,8 +1113,8 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // Tentatively insert the instructions into the block so that they // look "normal" to the transformation logic. - mbbi->insert(mi, NewMIs[0]); - mbbi->insert(mi, NewMIs[1]); + MBB->insert(mi, NewMIs[0]); + MBB->insert(mi, NewMIs[1]); DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0] << "2addr: NEW INST: " << *NewMIs[1]); @@ -1142,8 +1124,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB); MachineBasicBlock::iterator NewMI = NewMIs[1]; bool TransformSuccess = - TryInstructionTransform(NewMI, mi, mbbi, - NewSrcIdx, NewDstIdx, Dist, Processed); + tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist); if (TransformSuccess || NewMIs[1]->getOperand(NewSrcIdx).isKill()) { // Success, or at least we made an improvement. Keep the unfolded @@ -1202,8 +1183,7 @@ bool TwoAddressInstructionPass:: collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { const MCInstrDesc &MCID = MI->getDesc(); bool AnyOps = false; - unsigned NumOps = MI->isInlineAsm() ? - MI->getNumOperands() : MCID.getNumOperands(); + unsigned NumOps = MI->getNumOperands(); for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { unsigned DstIdx = 0; @@ -1373,22 +1353,21 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); DEBUG(dbgs() << "********** Function: " - << MF->getFunction()->getName() << '\n'); + << MF->getName() << '\n'); // This pass takes the function out of SSA form. MRI->leaveSSA(); TiedOperandMap TiedOperands; - - SmallPtrSet<MachineInstr*, 8> Processed; - for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end(); - mbbi != mbbe; ++mbbi) { + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + MBB = MBBI; unsigned Dist = 0; DistanceMap.clear(); SrcRegMap.clear(); DstRegMap.clear(); Processed.clear(); - for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); + for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end(); mi != me; ) { MachineBasicBlock::iterator nmi = llvm::next(mi); if (mi->isDebugValue()) { @@ -1402,7 +1381,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { DistanceMap.insert(std::make_pair(mi, ++Dist)); - ProcessCopy(&*mi, &*mbbi, Processed); + processCopy(&*mi); // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. @@ -1427,8 +1406,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { unsigned SrcReg = mi->getOperand(SrcIdx).getReg(); unsigned DstReg = mi->getOperand(DstIdx).getReg(); if (SrcReg != DstReg && - TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist, - Processed)) { + tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist)) { // The tied operands have been eliminated or shifted further down the // block to ease elimination. Continue processing with 'nmi'. TiedOperands.clear(); @@ -1468,7 +1446,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve // SSA form. It's now safe to de-SSA. - MadeChange |= EliminateRegSequences(); + MadeChange |= eliminateRegSequences(); return MadeChange; } @@ -1515,127 +1493,6 @@ static MachineInstr *findFirstDef(unsigned Reg, MachineRegisterInfo *MRI) { return First; } -/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are -/// EXTRACT_SUBREG from the same register and to the same virtual register -/// with different sub-register indices, attempt to combine the -/// EXTRACT_SUBREGs and pre-coalesce them. e.g. -/// %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0 -/// %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6 -/// %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5 -/// Since D subregs 5, 6 can combine to a Q register, we can coalesce -/// reg1026 to reg1029. -void -TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, - unsigned DstReg) { - SmallSet<unsigned, 4> Seen; - for (unsigned i = 0, e = Srcs.size(); i != e; ++i) { - unsigned SrcReg = Srcs[i]; - if (!Seen.insert(SrcReg)) - continue; - - // Check that the instructions are all in the same basic block. - MachineInstr *SrcDefMI = MRI->getUniqueVRegDef(SrcReg); - MachineInstr *DstDefMI = MRI->getUniqueVRegDef(DstReg); - if (!SrcDefMI || !DstDefMI || - SrcDefMI->getParent() != DstDefMI->getParent()) - continue; - - // If there are no other uses than copies which feed into - // the reg_sequence, then we might be able to coalesce them. - bool CanCoalesce = true; - SmallVector<unsigned, 4> SrcSubIndices, DstSubIndices; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(SrcReg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - if (!UseMI->isCopy() || UseMI->getOperand(0).getReg() != DstReg) { - CanCoalesce = false; - break; - } - SrcSubIndices.push_back(UseMI->getOperand(1).getSubReg()); - DstSubIndices.push_back(UseMI->getOperand(0).getSubReg()); - } - - if (!CanCoalesce || SrcSubIndices.size() < 2) - continue; - - // Check that the source subregisters can be combined. - std::sort(SrcSubIndices.begin(), SrcSubIndices.end()); - unsigned NewSrcSubIdx = 0; - if (!TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SrcSubIndices, - NewSrcSubIdx)) - continue; - - // Check that the destination subregisters can also be combined. - std::sort(DstSubIndices.begin(), DstSubIndices.end()); - unsigned NewDstSubIdx = 0; - if (!TRI->canCombineSubRegIndices(MRI->getRegClass(DstReg), DstSubIndices, - NewDstSubIdx)) - continue; - - // If neither source nor destination can be combined to the full register, - // just give up. This could be improved if it ever matters. - if (NewSrcSubIdx != 0 && NewDstSubIdx != 0) - continue; - - // Now that we know that all the uses are extract_subregs and that those - // subregs can somehow be combined, scan all the extract_subregs again to - // make sure the subregs are in the right order and can be composed. - MachineInstr *SomeMI = 0; - CanCoalesce = true; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(SrcReg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - assert(UseMI->isCopy()); - unsigned DstSubIdx = UseMI->getOperand(0).getSubReg(); - unsigned SrcSubIdx = UseMI->getOperand(1).getSubReg(); - assert(DstSubIdx != 0 && "missing subreg from RegSequence elimination"); - if ((NewDstSubIdx == 0 && - TRI->composeSubRegIndices(NewSrcSubIdx, DstSubIdx) != SrcSubIdx) || - (NewSrcSubIdx == 0 && - TRI->composeSubRegIndices(NewDstSubIdx, SrcSubIdx) != DstSubIdx)) { - CanCoalesce = false; - break; - } - // Keep track of one of the uses. Preferably the first one which has a - // <def,undef> flag. - if (!SomeMI || UseMI->getOperand(0).isUndef()) - SomeMI = UseMI; - } - if (!CanCoalesce) - continue; - - // Insert a copy to replace the original. - MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI, - SomeMI->getDebugLoc(), - TII->get(TargetOpcode::COPY)) - .addReg(DstReg, RegState::Define | - getUndefRegState(SomeMI->getOperand(0).isUndef()), - NewDstSubIdx) - .addReg(SrcReg, 0, NewSrcSubIdx); - - // Remove all the old extract instructions. - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(SrcReg), - UE = MRI->use_nodbg_end(); UI != UE; ) { - MachineInstr *UseMI = &*UI; - ++UI; - if (UseMI == CopyMI) - continue; - assert(UseMI->isCopy()); - // Move any kills to the new copy or extract instruction. - if (UseMI->getOperand(1).isKill()) { - CopyMI->getOperand(1).setIsKill(); - if (LV) - // Update live variables - LV->replaceKillInstruction(SrcReg, UseMI, &*CopyMI); - } - UseMI->eraseFromParent(); - } - } -} - static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq, MachineRegisterInfo *MRI) { for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), @@ -1647,7 +1504,7 @@ static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq, return false; } -/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part +/// eliminateRegSequences - Eliminate REG_SEQUENCE instructions as part /// of the de-ssa process. This replaces sources of REG_SEQUENCE as /// sub-register references of the register defined by REG_SEQUENCE. e.g. /// @@ -1655,7 +1512,7 @@ static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq, /// %reg1031<def> = REG_SEQUENCE %reg1029<kill>, 5, %reg1030<kill>, 6 /// => /// %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ... -bool TwoAddressInstructionPass::EliminateRegSequences() { +bool TwoAddressInstructionPass::eliminateRegSequences() { if (RegSequences.empty()) return false; @@ -1759,10 +1616,6 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg) MO.setIsUndef(); } - // Make sure there is a full non-subreg imp-def operand on the - // instruction. This shouldn't be necessary, but it seems that at least - // RAFast requires it. - Def->addRegisterDefined(DstReg, TRI); DEBUG(dbgs() << "First def: " << *Def); } @@ -1775,12 +1628,6 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { DEBUG(dbgs() << "Eliminated: " << *MI); MI->eraseFromParent(); } - - // Try coalescing some EXTRACT_SUBREG instructions. This can create - // INSERT_SUBREG instructions that must have <undef> flags added by - // LiveIntervalAnalysis, so only run it when LiveVariables is available. - if (LV) - CoalesceExtSubRegs(RealSrcs, DstReg); } RegSequences.clear(); diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp index 93840f0..bb93bdc 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -19,8 +19,8 @@ #define DEBUG_TYPE "regalloc" #include "VirtRegMap.h" #include "LiveDebugVariables.h" -#include "llvm/Function.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -127,9 +127,11 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const { OS << '\n'; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VirtRegMap::dump() const { print(dbgs()); } +#endif //===----------------------------------------------------------------------===// // VirtRegRewriter @@ -170,6 +172,7 @@ INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter", INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(LiveStacks) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter", "Virtual Register Rewriter", false, false) @@ -182,6 +185,8 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<SlotIndexes>(); AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveDebugVariables>(); + AU.addRequired<LiveStacks>(); + AU.addPreserved<LiveStacks>(); AU.addRequired<VirtRegMap>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -197,11 +202,11 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { VRM = &getAnalysis<VirtRegMap>(); DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " - << MF->getFunction()->getName() << '\n'); + << MF->getName() << '\n'); DEBUG(VRM->dump()); // Add kill flags while we still have virtual registers. - LIS->addKillFlags(); + LIS->addKillFlags(VRM); // Live-in lists on basic blocks are required for physregs. addMBBLiveIns(); @@ -252,9 +257,6 @@ void VirtRegRewriter::rewrite() { SmallVector<unsigned, 8> SuperDeads; SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; -#ifndef NDEBUG - BitVector Reserved = TRI->getReservedRegs(*MF); -#endif for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { @@ -278,7 +280,7 @@ void VirtRegRewriter::rewrite() { unsigned PhysReg = VRM->getPhys(VirtReg); assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Instruction uses unmapped VirtReg"); - assert(!Reserved.test(PhysReg) && "Reserved register assignment"); + assert(!MRI->isReserved(PhysReg) && "Reserved register assignment"); // Preserve semantics of sub-register operands. if (MO.getSubReg()) { diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.h b/contrib/llvm/lib/CodeGen/VirtRegMap.h index c320985..7974dda 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.h +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.h @@ -63,8 +63,8 @@ namespace llvm { /// createSpillSlot - Allocate a spill slot for RC from MFI. unsigned createSpillSlot(const TargetRegisterClass *RC); - VirtRegMap(const VirtRegMap&); // DO NOT IMPLEMENT - void operator=(const VirtRegMap&); // DO NOT IMPLEMENT + VirtRegMap(const VirtRegMap&) LLVM_DELETED_FUNCTION; + void operator=(const VirtRegMap&) LLVM_DELETED_FUNCTION; public: static char ID; |